【调包侠的机器学习】塔科夫RR价格回归预测|DengQN.com

DengQN·一个普通程序员;

【调包侠的机器学习】塔科夫RR价格回归预测

2022-08-23 18:05 62

#回归#ml

!pip install pymysql
import pymysql

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Requirement already satisfied: pymysql in c:\users\rustynail\miniconda3\envs\d2l\lib\site-packages (1.0.2)


WARNING: Ignoring invalid distribution -rotobuf (c:\users\rustynail\miniconda3\envs\d2l\lib\site-packages)
WARNING: Ignoring invalid distribution -rotobuf (c:\users\rustynail\miniconda3\envs\d2l\lib\site-packages)
WARNING: Ignoring invalid distribution -rotobuf (c:\users\rustynail\miniconda3\envs\d2l\lib\site-packages)
WARNING: Ignoring invalid distribution -rotobuf (c:\users\rustynail\miniconda3\envs\d2l\lib\site-packages)
WARNING: Ignoring invalid distribution -rotobuf (c:\users\rustynail\miniconda3\envs\d2l\lib\site-packages)
WARNING: Ignoring invalid distribution -rotobuf (c:\users\rustynail\miniconda3\envs\d2l\lib\site-packages)

db = pymysql.connect(host='x',
                     port=1,
                     user='x',
                     password='x',
                     database='x')
cursor = db.cursor()
cursor.execute("select price, price_updated from raw_item_rs where en_name = %s", "Red Rebel ice pick")
all = cursor.fetchall()

from datetime import datetime
import time
all = [{ "price": x[0], "time": int(time.mktime(datetime.strptime(x[1], "%Y-%m-%dT%H:%M:%S.%fz").timetuple()))} for x in all]
print(len(all))

import json

with open("../data/RedRebel.json", "w") as f:
    f.write(json.dumps(all))
    f.close()

import matplotlib.pyplot as plt
import numpy as np


all.sort(key=lambda x: x['time'])

print(all[:10])

x = np.array([x['time'] for x in all])
y = np.array([x['price'] for x in all])


plt.plot(x, y)

plt.show()

[{'price': 2999999, 'time': 1656638268}, {'price': 4499999, 'time': 1656688384}, {'price': 5500000, 'time': 1656707919}, {'price': 6000000, 'time': 1656728553}, {'price': 5500000, 'time': 1656797161}, {'price': 4300000, 'time': 1656810858}, {'price': 3911111, 'time': 1656824533}, {'price': 3800000, 'time': 1656838167}, {'price': 3799999, 'time': 1656852073}, {'price': 5555555, 'time': 1656864556}]

png

import pandas as pd
def mean_norm(df_input): #@save
    return df_input.apply(lambda x: (x-x.mean())/ x.std(), axis=0)
def de_mean_norm(result, df_input:pd.DataFrame): #@save
    return result * df_input.std() +  df_input.mean()

X = pd.DataFrame(x)
Y = pd.DataFrame(y)

print(X, Y)

              0
0    1656638268
1    1656688384
2    1656707919
3    1656728553
4    1656797161
..          ...
324  1661098048
325  1661107841
326  1661120091
327  1661131513
328  1661151899

[329 rows x 1 columns]            0
0    2999999
1    4499999
2    5500000
3    6000000
4    5500000
..       ...
324  3840000
325  3900000
326  3942000
327  3555555
328  3958888

[329 rows x 1 columns]

X = mean_norm(X)
Y = mean_norm(Y)
print(X.info, Y.info)

<bound method DataFrame.info of             0
0   -1.826706
1   -1.787782
2   -1.772610
3   -1.756584
4   -1.703298
..        ...
324  1.637094
325  1.644700
326  1.654214
327  1.663085
328  1.678918

[329 rows x 1 columns]> <bound method DataFrame.info of             0
0   -1.917204
1    0.956663
2    2.872577
3    3.830532
4    2.872577
..        ...
324 -0.307836
325 -0.192882
326 -0.112413
327 -0.852808
328 -0.080057

[329 rows x 1 columns]>

import seaborn as sbn

all = pd.DataFrame(all)
all.info

<bound method DataFrame.info of        price        time
0    2999999  1656638268
1    4499999  1656688384
2    5500000  1656707919
3    6000000  1656728553
4    5500000  1656797161
..       ...         ...
324  3840000  1661098048
325  3900000  1661107841
326  3942000  1661120091
327  3555555  1661131513
328  3958888  1661151899

[329 rows x 2 columns]>

sbn.scatterplot(x="time", y='price', data=all)

<AxesSubplot:xlabel='time', ylabel='price'>

png

## 回归网络
import tensorflow as tf
from d2l import tensorflow as d2l

net = tf.keras.Sequential()
# 添加一个连接层（Dense），输出标量数量为 1 （ w1x1 + w2x2 + b = y）
net.add(tf.keras.layers.Dense(units=10, input_dim=1, activation=tf.keras.activations.relu))
net.add(tf.keras.layers.Dense(units=1, input_dim=10, activation=tf.keras.activations.relu))
# 正态分布随机
initializer = tf.initializers.RandomNormal(stddev=0.1)
# 获取网络
# net = tf.keras.Sequential()
# 相当初始化一层，用来提供初始化数据
net.add(tf.keras.layers.Dense(1, kernel_initializer=initializer))

# 批量随机梯度下降
# trainer = tf.keras.optimizers.SGD(learning_rate=0.0003)
sgd = tf.keras.optimizers.Adam(learning_rate=0.001)
net.compile(optimizer=sgd, loss=tf.keras.losses.MeanSquaredError())

history = net.fit(X, Y, batch_size=20, epochs=300, validation_split=0.2, callbacks=[])
net.summary()

Epoch 1/300
14/14 [==============================] - 0s 12ms/step - loss: 1.0883 - val_loss: 0.5398
Epoch 2/300
14/14 [==============================] - 0s 6ms/step - loss: 1.0816 - val_loss: 0.5425

Epoch 300/300
14/14 [==============================] - 0s 6ms/step - loss: 0.6169 - val_loss: 0.3916
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense_6 (Dense)             (None, 10)                20        
                                                                 
 dense_7 (Dense)             (None, 1)                 11        
                                                                 
 dense_8 (Dense)             (None, 1)                 2         
                                                                 
=================================================================
Total params: 33
Trainable params: 33
Non-trainable params: 0
_________________________________________________________________

plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.legend()
plt.show()

png

# future 3 day per hour
import datetime as dt
dateRange = pd.date_range(start=dt.datetime.strptime('2022082201', '%Y%m%d%H'), periods=24, freq='H')

testX = pd.DataFrame([int(d.timestamp()) for d in dateRange])

testX

<div> <style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style> <table border="1" class="dataframe"> <thead> <tr style="text-align: right;"> <th></th> <th>0</th> </tr> </thead> <tbody> <tr> <th>0</th> <td>1661130000</td> </tr> <tr> <th>1</th> <td>1661133600</td> </tr> <tr> <th>2</th> <td>1661137200</td> </tr> <tr> <th>3</th> <td>1661140800</td> </tr> <tr> <th>4</th> <td>1661144400</td> </tr> <tr> <th>5</th> <td>1661148000</td> </tr> <tr> <th>6</th> <td>1661151600</td> </tr> <tr> <th>7</th> <td>1661155200</td> </tr> <tr> <th>8</th> <td>1661158800</td> </tr> <tr> <th>9</th> <td>1661162400</td> </tr> <tr> <th>10</th> <td>1661166000</td> </tr> <tr> <th>11</th> <td>1661169600</td> </tr> <tr> <th>12</th> <td>1661173200</td> </tr> <tr> <th>13</th> <td>1661176800</td> </tr> <tr> <th>14</th> <td>1661180400</td> </tr> <tr> <th>15</th> <td>1661184000</td> </tr> <tr> <th>16</th> <td>1661187600</td> </tr> <tr> <th>17</th> <td>1661191200</td> </tr> <tr> <th>18</th> <td>1661194800</td> </tr> <tr> <th>19</th> <td>1661198400</td> </tr> <tr> <th>20</th> <td>1661202000</td> </tr> <tr> <th>21</th> <td>1661205600</td> </tr> <tr> <th>22</th> <td>1661209200</td> </tr> <tr> <th>23</th> <td>1661212800</td> </tr> </tbody> </table> </div>

testX1 = mean_norm(testX)

testX1

<div> <style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style> <table border="1" class="dataframe"> <thead> <tr style="text-align: right;"> <th></th> <th>0</th> </tr> </thead> <tbody> <tr> <th>0</th> <td>-1.626346</td> </tr> <tr> <th>1</th> <td>-1.484924</td> </tr> <tr> <th>2</th> <td>-1.343503</td> </tr> <tr> <th>3</th> <td>-1.202082</td> </tr> <tr> <th>4</th> <td>-1.060660</td> </tr> <tr> <th>5</th> <td>-0.919239</td> </tr> <tr> <th>6</th> <td>-0.777817</td> </tr> <tr> <th>7</th> <td>-0.636396</td> </tr> <tr> <th>8</th> <td>-0.494975</td> </tr> <tr> <th>9</th> <td>-0.353553</td> </tr> <tr> <th>10</th> <td>-0.212132</td> </tr> <tr> <th>11</th> <td>-0.070711</td> </tr> <tr> <th>12</th> <td>0.070711</td> </tr> <tr> <th>13</th> <td>0.212132</td> </tr> <tr> <th>14</th> <td>0.353553</td> </tr> <tr> <th>15</th> <td>0.494975</td> </tr> <tr> <th>16</th> <td>0.636396</td> </tr> <tr> <th>17</th> <td>0.777817</td> </tr> <tr> <th>18</th> <td>0.919239</td> </tr> <tr> <th>19</th> <td>1.060660</td> </tr> <tr> <th>20</th> <td>1.202082</td> </tr> <tr> <th>21</th> <td>1.343503</td> </tr> <tr> <th>22</th> <td>1.484924</td> </tr> <tr> <th>23</th> <td>1.626346</td> </tr> </tbody> </table> </div>

testY = net.predict(testX1)

testY

array([[ 1.6376313 ],
       [ 1.4921811 ],
       [ 1.3457209 ],
       [ 0.8415806 ],
       [ 0.2825816 ],
       [-0.27449   ],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372],
       [-0.28884372]], dtype=float32)

y1 = [yyy[0] for yyy in testY]
y1

[1.6376313,
 1.4921811,
 1.3457209,
 0.8415806,
 0.2825816,
 -0.27449,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372,
 -0.28884372]

y_de = [de_mean_norm(yyy, pd.DataFrame(y)) for yyy in y1]

for n in zip(y_de, testX.values):
    print(type(n[1][0]))
    print("%d --> %s" % (n[0], dt.datetime.fromtimestamp(n[1][0])))

<class 'numpy.int64'>
4855426 --> 2022-08-22 09:00:00
<class 'numpy.int64'>
4779509 --> 2022-08-22 10:00:00
<class 'numpy.int64'>
4703065 --> 2022-08-22 11:00:00
<class 'numpy.int64'>
4439932 --> 2022-08-22 12:00:00
<class 'numpy.int64'>
4148165 --> 2022-08-22 13:00:00
<class 'numpy.int64'>
3857404 --> 2022-08-22 14:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 15:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 16:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 17:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 18:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 19:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 20:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 21:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 22:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-22 23:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 00:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 01:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 02:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 03:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 04:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 05:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 06:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 07:00:00
<class 'numpy.int64'>
3849913 --> 2022-08-23 08:00:00

plt.plot([dt.datetime.fromtimestamp(dd[0]) for dd in testX.values], [ '%d' % de for de in y_de])

[<matplotlib.lines.Line2D at 0x24d6f149e80>]

这个价格和目前游戏内的交易价格相差不大

png

开往

十年之约