65 lines
1.7 KiB
Python
65 lines
1.7 KiB
Python
import urllib.request, json
|
|
import time, calendar
|
|
|
|
from objects import glob
|
|
|
|
finished = True
|
|
|
|
def run():
|
|
global finished
|
|
|
|
if not finished:
|
|
print("[!] Scraper job is already running! ")
|
|
return
|
|
|
|
print("[!] Starting scraping job...")
|
|
sql = glob.new_sql()
|
|
cur = sql.cursor()
|
|
|
|
cur.execute("SELECT max(file_version) f FROM updates")
|
|
target = cur.fetchone()["f"]
|
|
|
|
finished = False
|
|
failed_streak = 0
|
|
while not finished:
|
|
target += 1
|
|
|
|
attempts = 0
|
|
extra_sleep = 0
|
|
while attempts < glob.config["scrape"]["max_attempts"]:
|
|
try:
|
|
with urllib.request.urlopen("https://osu.ppy.sh/web/check-updates.php?action=path&stream=stable&target=%s" % target) as url:
|
|
data = json.loads(url.read().decode())[0]
|
|
if "url_patch" not in data.keys():
|
|
data["url_patch"] = None
|
|
cur.execute("INSERT INTO updates (file_version,filename,file_hash,filesize,timestamp,patch_id,url_full,url_patch) VALUES ('%s','%s','%s','%s',%s,'%s','%s','%s')" %
|
|
(
|
|
data["file_version"],
|
|
data["filename"],
|
|
data["file_hash"],
|
|
data["filesize"],
|
|
calendar.timegm(time.strptime(data["timestamp"], "%Y-%m-%d %H:%M:%S")),
|
|
data["patch_id"],
|
|
data["url_full"],
|
|
data["url_patch"],
|
|
))
|
|
sql.commit()
|
|
failed_streak = 0
|
|
print("[Scraper] Target: %s, Status: OK" % target)
|
|
break
|
|
except:
|
|
attempts += 1
|
|
failed_streak += 1
|
|
if glob.config["scrape"]["increase_delay_on_fail"]:
|
|
extra_sleep = attempts
|
|
print("[Scraper] Target: %s, status: FAILED, Attempt: %s" % (target, attempts))
|
|
|
|
time.sleep(glob.config["scrape"]["delay"] + extra_sleep)
|
|
if failed_streak > glob.config["scrape"]["skips_until_finished"]:
|
|
finished = True
|
|
break
|
|
|
|
cur.close()
|
|
sql.close()
|
|
print("[Scraper] Finished!")
|