Question:
I have been happily using the always-on-feature for years on replit and now I have been told I need to use deployment. I don’t have a webapp or a complicated bot, its just a simple program that is supposed to run every 60 minutes.
The deployment options are confusing, I tried it for a similar program and it just ended in a black box, no console to check the print statements, no up to date csv file visible!
Please show me how to use the deployment as a simple replacement for this very simple ‘always on’ program.
Thanks for your help!
Repl link:
https://replit.com/@KatharinaNi/webscraping-bot-always-on
import lxml
import csv
import datetime as dt
import time
from random import randint, seed
from urllib.request import Request, urlopen
import bs4
import requests
'''simplyfied version of my always on program:
1. program run every 60 minutes
2. program reads and writes a csv file
3. I need to easily access the console to check the print statements from time to time
'''
# scrape website and get all article links
articles = []
date_time = dt.datetime.now()
response = requests.get("https://www.thechinastory.org/blog/", headers = {'User-agent': 'Mozilla/5.0'})
soup = bs4.BeautifulSoup(response.text, 'lxml')
mydivs = soup.findAll("article")
for mydiv in mydivs:
articles.append(mydiv.find('a').get('href'))
# read csv file with all already scraped article links
with open('article_db.csv') as c:
article_record_database = ' '.join(c.readlines())
# create article list will all new links that are not in article_db.csv
article_list = [a for a in articles if a not in article_record_database]
# save all new article links in article_db.csv and print to console
if article_list:
print(date_time, len(article_list) , ' new articles found')
for article in article_list:
with open('article_db.csv', mode='a') as data_file:
csv_file = csv.writer(data_file,
quotechar='"',
quoting=csv.QUOTE_MINIMAL)
csv_file.writerow([article, date_time])
else:
print(date_time, 'no new articles found')
# wait 60 minutes until scraoing starts again
waiting_minutes = 60
print(f'wait for {waiting_minutes} minutes, {date_time}')
time.sleep(waiting_minutes*60)