!pip install pymysql
import pymysql
Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Requirement already satisfied: pymysql in c:\users\rustynail\miniconda3\envs\d2l\lib\site-packages (1.0.2)
WARNING: Ignoring invalid distribution -rotobuf (c:\users\rustynail\miniconda3\envs\d2l\lib\site-packages)
WARNING: Ignoring invalid distribution -rotobuf (c:\users\rustynail\miniconda3\envs\d2l\lib\site-packages)
WARNING: Ignoring invalid distribution -rotobuf (c:\users\rustynail\miniconda3\envs\d2l\lib\site-packages)
WARNING: Ignoring invalid distribution -rotobuf (c:\users\rustynail\miniconda3\envs\d2l\lib\site-packages)
WARNING: Ignoring invalid distribution -rotobuf (c:\users\rustynail\miniconda3\envs\d2l\lib\site-packages)
WARNING: Ignoring invalid distribution -rotobuf (c:\users\rustynail\miniconda3\envs\d2l\lib\site-packages)
db = pymysql.connect(host='x',
port=1,
user='x',
password='x',
database='x')
cursor = db.cursor()
cursor.execute("select price, price_updated from raw_item_rs where en_name = %s", "Red Rebel ice pick")
all = cursor.fetchall()
from datetime import datetime
import time
all = [{ "price": x[0], "time": int(time.mktime(datetime.strptime(x[1], "%Y-%m-%dT%H:%M:%S.%fz").timetuple()))} for x in all]
print(len(all))
329
import json
with open("../data/RedRebel.json", "w") as f:
f.write(json.dumps(all))
f.close()
import matplotlib.pyplot as plt
import numpy as np
all.sort(key=lambda x: x['time'])
print(all[:10])
x = np.array([x['time'] for x in all])
y = np.array([x['price'] for x in all])
plt.plot(x, y)
plt.show()
[{'price': 2999999, 'time': 1656638268}, {'price': 4499999, 'time': 1656688384}, {'price': 5500000, 'time': 1656707919}, {'price': 6000000, 'time': 1656728553}, {'price': 5500000, 'time': 1656797161}, {'price': 4300000, 'time': 1656810858}, {'price': 3911111, 'time': 1656824533}, {'price': 3800000, 'time': 1656838167}, {'price': 3799999, 'time': 1656852073}, {'price': 5555555, 'time': 1656864556}]
import pandas as pd
def mean_norm(df_input): #@save
return df_input.apply(lambda x: (x-x.mean())/ x.std(), axis=0)
def de_mean_norm(result, df_input:pd.DataFrame): #@save
return result * df_input.std() + df_input.mean()
X = pd.DataFrame(x)
Y = pd.DataFrame(y)
print(X, Y)
0
0 1656638268
1 1656688384
2 1656707919
3 1656728553
4 1656797161
.. ...
324 1661098048
325 1661107841
326 1661120091
327 1661131513
328 1661151899
[329 rows x 1 columns] 0
0 2999999
1 4499999
2 5500000
3 6000000
4 5500000
.. ...
324 3840000
325 3900000
326 3942000
327 3555555
328 3958888
[329 rows x 1 columns]
X = mean_norm(X)
Y = mean_norm(Y)
print(X.info, Y.info)
<bound method DataFrame.info of 0
0 -1.826706
1 -1.787782
2 -1.772610
3 -1.756584
4 -1.703298
.. ...
324 1.637094
325 1.644700
326 1.654214
327 1.663085
328 1.678918
[329 rows x 1 columns]> <bound method DataFrame.info of 0
0 -1.917204
1 0.956663
2 2.872577
3 3.830532
4 2.872577
.. ...
324 -0.307836
325 -0.192882
326 -0.112413
327 -0.852808
328 -0.080057
[329 rows x 1 columns]>
import seaborn as sbn
all = pd.DataFrame(all)
all.info
<bound method DataFrame.info of price time
0 2999999 1656638268
1 4499999 1656688384
2 5500000 1656707919
3 6000000 1656728553
4 5500000 1656797161
.. ... ...
324 3840000 1661098048
325 3900000 1661107841
326 3942000 1661120091
327 3555555 1661131513
328 3958888 1661151899
[329 rows x 2 columns]>
sbn.scatterplot(x="time", y='price', data=all)
<AxesSubplot:xlabel='time', ylabel='price'>
## 回归网络
import tensorflow as tf
from d2l import tensorflow as d2l
net = tf.keras.Sequential()
# 添加一个连接层(Dense),输出标量数量为 1 ( w1x1 + w2x2 + b = y)
net.add(tf.keras.layers.Dense(units=10, input_dim=1, activation=tf.keras.activations.relu))
net.add(tf.keras.layers.Dense(units=1, input_dim=10, activation=tf.keras.activations.relu))
# 正态分布随机
initializer = tf.initializers.RandomNormal(stddev=0.1)
# 获取网络
# net = tf.keras.Sequential()
# 相当初始化一层,用来提供初始化数据
net.add(tf.keras.layers.Dense(1, kernel_initializer=initializer))
# 批量随机梯度下降
# trainer = tf.keras.optimizers.SGD(learning_rate=0.0003)
sgd = tf.keras.optimizers.Adam(learning_rate=0.001)
net.compile(optimizer=sgd, loss=tf.keras.losses.MeanSquaredError())
history = net.fit(X, Y, batch_size=20, epochs=300, validation_split=0.2, callbacks=[])
net.summary()
Epoch 1/300
14/14 [==============================] - 0s 12ms/step - loss: 1.0883 - val_loss: 0.5398
Epoch 2/300
14/14 [==============================] - 0s 6ms/step - loss: 1.0816 - val_loss: 0.5425
Epoch 300/300
14/14 [==============================] - 0s 6ms/step - loss: 0.6169 - val_loss: 0.3916
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_6 (Dense) (None, 10) 20
dense_7 (Dense) (None, 1) 11
dense_8 (Dense) (None, 1) 2
=================================================================
Total params: 33
Trainable params: 33
Non-trainable params: 0
_________________________________________________________________
plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.legend()
plt.show()
# future 3 day per hour
import datetime as dt
dateRange = pd.date_range(start=dt.datetime.strptime('2022082201', '%Y%m%d%H'), periods=24, freq='H')
testX = pd.DataFrame([int(d.timestamp()) for d in dateRange])
testX
<div> <style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style> <table border="1" class="dataframe"> <thead> <tr style="text-align: right;"> <th></th> <th>0</th> </tr> </thead> <tbody> <tr> <th>0</th> <td>1661130000</td> </tr> <tr> <th>1</th> <td>1661133600</td> </tr> <tr> <th>2</th> <td>1661137200</td> </tr> <tr> <th>3</th> <td>1661140800</td> </tr> <tr> <th>4</th> <td>1661144400</td> </tr> <tr> <th>5</th> <td>1661148000</td> </tr> <tr> <th>6</th> <td>1661151600</td> </tr> <tr> <th>7</th> <td>1661155200</td> </tr> <tr> <th>8</th> <td>1661158800</td> </tr> <tr> <th>9</th> <td>1661162400</td> </tr> <tr> <th>10</th> <td>1661166000</td> </tr> <tr> <th>11</th> <td>1661169600</td> </tr> <tr> <th>12</th> <td>1661173200</td> </tr> <tr> <th>13</th> <td>1661176800</td> </tr> <tr> <th>14</th> <td>1661180400</td> </tr> <tr> <th>15</th> <td>1661184000</td> </tr> <tr> <th>16</th> <td>1661187600</td> </tr> <tr> <th>17</th> <td>1661191200</td> </tr> <tr> <th>18</th> <td>1661194800</td> </tr> <tr> <th>19</th> <td>1661198400</td> </tr> <tr> <th>20</th> <td>1661202000</td> </tr> <tr> <th>21</th> <td>1661205600</td> </tr> <tr> <th>22</th> <td>1661209200</td> </tr> <tr> <th>23</th> <td>1661212800</td> </tr> </tbody> </table> </div>
testX1 = mean_norm(testX)
testX1
<div> <style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style> <table border="1" class="dataframe"> <thead> <tr style="text-align: right;"> <th></th> <th>0</th> </tr> </thead> <tbody> <tr> <th>0</th> <td>-1.626346</td> </tr> <tr> <th>1</th> <td>-1.484924</td> </tr> <tr> <th>2</th> <td>-1.343503</td> </tr> <tr> <th>3</th> <td>-1.202082</td> </tr> <tr> <th>4</th> <td>-1.060660</td> </tr> <tr> <th>5</th> <td>-0.919239</td> </tr> <tr> <th>6</th> <td>-0.777817</td> </tr> <tr> <th>7</th> <td>-0.636396</td> </tr> <tr> <th>8</th> <td>-0.494975</td> </tr> <tr> <th>9</th> <td>-0.353553</td> </tr> <tr> <th>10</th> <td>-0.212132</td> </tr> <tr> <th>11</th> <td>-0.070711</td> </tr> <tr> <th>12</th> <td>0.070711</td> </tr> <tr> <th>13</th> <td>0.212132</td> </tr> <tr> <th>14</th> <td>0.353553</td> </tr> <tr> <th>15</th> <td>0.494975</td> </tr> <tr> <th>16</th> <td>0.636396</td> </tr> <tr> <th>17</th> <td>0.777817</td> </tr> <tr> <th>18</th> <td>0.919239</td> </tr> <tr> <th>19</th> <td>1.060660</td> </tr> <tr> <th>20</th> <td>1.202082</td> </tr> <tr> <th>21</th> <td>1.343503</td> </tr> <tr> <th>22</th> <td>1.484924</td> </tr> <tr> <th>23</th> <td>1.626346</td> </tr> </tbody> </table> </div>
testY = net.predict(testX1)
testY
array([[ 1.6376313 ],
[ 1.4921811 ],
[ 1.3457209 ],
[ 0.8415806 ],
[ 0.2825816 ],
[-0.27449 ],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372],
[-0.28884372]], dtype=float32)
y1 = [yyy[0] for yyy in testY]
y1
[1.6376313,
1.4921811,
1.3457209,
0.8415806,
0.2825816,
-0.27449,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372,
-0.28884372]
y_de = [de_mean_norm(yyy, pd.DataFrame(y)) for yyy in y1]
for n in zip(y_de, testX.values):
print(type(n[1][0]))
print("%d --> %s" % (n[0], dt.datetime.fromtimestamp(n[1][0])))
<class 'numpy.int64'>
4855426 --> 2022-08-22 09:00:00
<class 'numpy.int64'>
4779509 --> 2022-08-22 10:00:00
<class 'numpy.int64'>
4703065 --> 2022-08-22 11:00:00
<class 'numpy.int64'>
4439932 --> 2022-08-22 12:00:00
<class 'numpy.int64'>
4148165 --> 2022-08-22 13:00:00
<class 'numpy.int64'>
3857404 --> 2022-08-22 14:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 15:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 16:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 17:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 18:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 19:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 20:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 21:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 22:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 23:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 00:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 01:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 02:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 03:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 04:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 05:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 06:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 07:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 08:00:00
plt.plot([dt.datetime.fromtimestamp(dd[0]) for dd in testX.values], [ '%d' % de for de in y_de])
[<matplotlib.lines.Line2D at 0x24d6f149e80>]