I wrote this code :
df = pd.DataFrame(maindatatable)
now = datetime.date.today()
df['date'] = now
#df.rows = header
df.to_csv('output.csv', sep=';', encoding='latin-1', index=True)
connection = MySQLdb.connect(host='localhost',
user='root',
passwd='1234',
db='database')
cursor = connection.cursor()
query = """ load data local infile 'C:/Python27/output.csv'
into table valami
character set latin1
fields terminated by ';'
lines terminated by '\n'
ignore 1 lines;
"""
cursor.execute(query)
connection.commit()
cursor.close()
I used web-scraping a table from the internet every day, and i want to import to the sql. How can i do that, if i run my code every day, the values put into this sql table ? (append?)
output.csv code :
filename=r'output.csv'
resultcsv=open(filename,"wb")
output=csv.writer(resultcsv, delimiter=';',quotechar = '"', quoting=csv.QUOTE_NONNUMERIC, encoding='latin-1')
header = ['Pénznem', 'Devizanév','Egység','Pénznem_Forintban', 'date']
output.writerow(header)
def make_soup(url):
thepage = urllib2.urlopen(url)
soupdata = BeautifulSoup(thepage, "html.parser")
return soupdata
def to_2d(l,n):
return [l[i:i+n] for i in range(0, len(l), n)]
soup=make_soup("https://www.mnb.hu/arfolyamok")
datatable=[]
for record in soup.findAll('tr'):
for data in record.findAll('td'):
datatable.append(data.text)
maindatatable = to_2d(datatable, 4)
output.writerows(maindatatable)
resultcsv.close()
答案 0 :(得分:1)
我试图解决这个问题。看看我的SQLAlchemy解决方案。你可以在mysql shell中创建数据库和表,之后你可以运行python代码(我的版本是Ubuntu上的Python3)。
import csv
import urllib.request
import pandas as pd
import datetime
from sqlalchemy import create_engine
import MySQLdb
from bs4 import BeautifulSoup
filename='output.csv'
resultcsv=open(filename,"w")
output=csv.writer(resultcsv, delimiter=';',quotechar = '"', quoting=csv.QUOTE_NONNUMERIC)
header = ['Pénznem', 'Devizanév','Egység','Pénznem_Forintban', 'date']
output.writerow(header)
with urllib.request.urlopen("https://www.mnb.hu/arfolyamok") as url:
s = url.read()
soup = BeautifulSoup(s, 'html.parser')
def to_2d(l,n):
return [l[i:i+n] for i in range(0, len(l), n)]
datatable=[]
for record in soup.findAll('tr'):
for data in record.findAll('td'):
datatable.append(data.text)
maindatatable = to_2d(datatable, 4)
output.writerows(maindatatable)
resultcsv.close()
df = pd.DataFrame(maindatatable)
print (df)
engine = create_engine("mysql+mysqldb://you:"+'viktororban'+"@localhost/dbase")
df.to_sql(con=engine,name = 'newtable', if_exists='fail', index=True)
当然你可以改变passwd!