我在一个表中创建了一个新列,并且我正在使用beautifulsoup刮取相应的列,并将提取的内容插入到新列中。我只会收到错误消息,可以使用插入还是更新?表中总共有10列,但仅定位了两列。我怎样才能解决这个问题?谢谢。我正在尝试使用reddit_links,使用beautifulsoup,然后将其放在新创建的reddit_posts_2列中。
import pymysql
from bs4 import BeautifulSoup
from selenium import webdriver
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--incognito")
driver = webdriver.Chrome(chrome_options=chrome_options)
mainDB_cnx = pymysql.connect(user='', password='',
host='',
database='Content', use_unicode=True, charset="utf8mb4")
mainDB_cursor = mainDB_cnx.cursor()
mainDB_cursor.execute("SELECT reddit_links FROM Content.Reddit")
rows = mainDB_cursor.fetchone()
for row in rows:
print(row[0])
row = mainDB_cursor.fetchone()
open_page = driver.get(row[0])
html_source = driver.page_source
soup = BeautifulSoup(html_source, 'html.parser')
for script in soup(["script", "style"]):
script.decompose()
reddit_posts_2 = soup.get_text()
with mainDB_cnx:
mainDB_cursor.execute(
"INSERT INTO Reddit(reddit_posts_2) VALUES(%s)",
reddit_posts_2.encode('utf-8'))
# Close the cursor
mainDB_cursor.close()
# Commit the transaction
mainDB_cnx.commit()
# Close the database connection
mainDB_cnx.close()