from selenium import webdriver #导入模块,selenium导入浏览器驱动,用get方法打开浏览器
import time
import re
import csv #存储数据
from lxml import etree #数据的解析
import pandas as pd
import numpy as np
option = webdriver.ChromeOptions() #网址获取
option.add_argument('headless') #无界面启动,即设置浏览器静默
driver = webdriver.Chrome(options=option)
driver.get('http://data.eastmoney.com/zjlx/000040.html')
table=driver.find_element(By.ID,'table_ls')
# data_heads1=table.find_elements(By.TAG_NAME,'th')[:8]
# data_heads2=table.find_elements(By.TAG_NAME,'th')[8:]
data_bodies = table.find_element(By.TAG_NAME,'tbody').find_elements(By.TAG_NAME,'tr')
datas=[]
for data_body in data_bodies:
data3=data_body.text.split()
datas.append(data3)
datas
name=['日期','收盘价','涨跌幅','主力净流入净额','主力净流入净占比','超大单净流入净额','超大单净流入净占比',
'大单净流入净额','大单净流入净占比','中单净流入净额','中单净流入净占比','小单净流入净额','小单净流入净占比']
data= pd.DataFrame(columns=name,data=datas,dtype=np.float64)
data
from sqlalchemy import create_engine
import pymysql
conn= pymysql.connect(host='localhost',port=3306,user='root',passwd='root',
db='mysql',
charset='utf8',
cursorclass=pymysql.cursors.DictCursor)
engine=create_engine('mysql+pymysql://root:root@localhost:3306/mysql?charset=utf8mb4')
df=pd.DataFrame(data)
df.日期=pd.to_datetime(df.日期,format='%Y-%m-%d')
with engine.begin()as conn:
df.to_sql(name='economic',con=conn,if_exists='replace',index=False)
conn.close()
疑问:我的变量类型尝试了很多方法去转换,但是始终存到MySQL里面的时候其他变量都是text类型,且无法显示具体数字;而且就算在jupyter里面也是无论怎么转换类型,结果输出来我的变量类型都是object(日期跟收盘价除外,但是日期的类型导致我在SQL里面会显示时间,我只想有日期不想有时间该怎么弄啊)
上述问题已解决,需要对MySQL进行utf8编码,完整代码如下:
from selenium import webdriver #导入模块,selenium导入浏览器驱动,用get方法打开浏览器
import time
import re
import csv #存储数据
from lxml import etree #数据的解析
import pandas as pd
import numpy as np
from selenium.webdriver.common.by import By
option = webdriver.ChromeOptions() #网址获取
# option.add_argument('headless') #无界面启动,即设置浏览器静默
# driver = webdriver.Chrome(options=option)
driver = webdriver.Chrome()
driver.get('http://data.eastmoney.com/zjlx/000040.html')
type(driver)
table=driver.find_element(By.ID,"table_ls")
table.text
# data_heads1=table.find_elements(By.TAG_NAME,'th')[:8]
# data_heads2=table.find_elements(By.TAG_NAME,'th')[8:]
data_bodies = table.find_element(By.TAG_NAME,'tbody').find_elements(By.TAG_NAME,'tr')
datas=[]
for data_body in data_bodies:
data3=data_body.text.split()
datas.append(data3)
datas
name=['日期','收盘价','涨跌幅','主力净流入净额','主力净流入净占比','超大单净流入净额','超大单净流入净占比',
'大单净流入净额','大单净流入净占比','中单净流入净额','中单净流入净占比','小单净流入净额','小单净流入净占比']
data= pd.DataFrame(columns=name,data=datas,dtype=np.float64)
data
#以下操作准备连接mysql
import sqlalchemy
from sqlalchemy import create_engine
import pymysql
conn= pymysql.connect(host='localhost',port=3306,user='root',passwd='root',
db='mysql',
charset='utf8',
cursorclass=pymysql.cursors.DictCursor)
engine=create_engine('mysql+pymysql://root:root@localhost:3306/mysql?charset=utf8mb4')
df=pd.DataFrame(data)
df.日期=pd.to_datetime(df.日期,format='%Y-%m-%d')
with engine.begin()as conn:
df.to_sql(name='economic', con=conn,if_exists='replace',index=False,
dtype={'收盘价': sqlalchemy.types.String(length=20),
'涨跌幅': sqlalchemy.types.String(length=20),
'主力净流入净额': sqlalchemy.types.String(length=20),
'主力净流入净占比': sqlalchemy.types.String(length=20),
'超大单净流入净额': sqlalchemy.types.String(length=20),
'超大单净流入净占比': sqlalchemy.types.String(length=20),
'大单净流入净额': sqlalchemy.types.String(length=20),
'大单净流入净占比': sqlalchemy.types.String(length=20),
'中单净流入净额': sqlalchemy.types.String(length=20),
'中单净流入净占比': sqlalchemy.types.String(length=20),
'小单净流入净额': sqlalchemy.types.String(length=20),
'小单净流入净占比': sqlalchemy.types.String(length=20),
})
conn.close()