import urllib.request, json import time, calendar from objects import glob finished = True def run(): global finished if not finished: print("[!] Scraper job is already running! ") return print("[!] Starting scraping job...") sql = glob.new_sql() cur = sql.cursor() cur.execute("SELECT max(file_version) f FROM updates") target = cur.fetchone()["f"] finished = False failed_streak = 0 while not finished: target += 1 attempts = 0 extra_sleep = 0 while attempts < glob.config["scrape"]["max_attempts"]: try: with urllib.request.urlopen("https://osu.ppy.sh/web/check-updates.php?action=path&stream=stable&target=%s" % target) as url: data = json.loads(url.read().decode())[0] if "url_patch" not in data.keys(): data["url_patch"] = None cur.execute("INSERT INTO updates (file_version,filename,file_hash,filesize,timestamp,patch_id,url_full,url_patch) VALUES ('%s','%s','%s','%s',%s,'%s','%s','%s')" % ( data["file_version"], data["filename"], data["file_hash"], data["filesize"], calendar.timegm(time.strptime(data["timestamp"], "%Y-%m-%d %H:%M:%S")), data["patch_id"], data["url_full"], data["url_patch"], )) sql.commit() failed_streak = 0 print("[Scraper] Target: %s, Status: OK" % target) break except: attempts += 1 failed_streak += 1 if glob.config["scrape"]["increase_delay_on_fail"]: extra_sleep = attempts print("[Scraper] Target: %s, status: FAILED, Attempt: %s" % (target, attempts)) time.sleep(glob.config["scrape"]["delay"] + extra_sleep) if failed_streak > glob.config["scrape"]["skips_until_finished"]: finished = True break cur.close() sql.close() print("[Scraper] Finished!")