67 lines
2.2 KiB
Python
67 lines
2.2 KiB
Python
|
#Used for scraping osu files from osu.ppy.sh and storing them into self database
|
||
|
|
||
|
import urllib.request, json
|
||
|
import MySQLdb
|
||
|
import MySQLdb.cursors
|
||
|
import time
|
||
|
import atexit
|
||
|
|
||
|
finished = False
|
||
|
|
||
|
with open("config.json", "r") as f:
|
||
|
config = json.load(f)
|
||
|
|
||
|
with open("memory.json", "r") as f:
|
||
|
memory = json.load(f)
|
||
|
|
||
|
sql = MySQLdb.connect(**config["sql"], cursorclass = MySQLdb.cursors.DictCursor)
|
||
|
|
||
|
cur = sql.cursor()
|
||
|
|
||
|
def on_close():
|
||
|
with open("memory.json", "w") as f:
|
||
|
json.dump(memory, f)
|
||
|
print("Closing...")
|
||
|
|
||
|
atexit.register(on_close)
|
||
|
|
||
|
failed_streak = 0
|
||
|
|
||
|
while not finished:
|
||
|
target = memory["scrape"]["last"] + 1
|
||
|
attempts = 0
|
||
|
completed = False
|
||
|
extra_sleep = 0
|
||
|
while attempts < config["scrape"]["max_attempts"] and not completed:
|
||
|
try:
|
||
|
with urllib.request.urlopen("https://osu.ppy.sh/web/check-updates.php?action=path&stream=stable&target={}".format(target)) as url:
|
||
|
data = json.loads(url.read().decode())[0]
|
||
|
if "url_patch" not in data.keys():
|
||
|
data["url_patch"] = None
|
||
|
cur.execute("INSERT INTO updates (file_version,filename,file_hash,filesize,timestamp,patch_id,url_full,url_patch) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)",
|
||
|
[
|
||
|
data["file_version"],
|
||
|
data["filename"],
|
||
|
data["file_hash"],
|
||
|
data["filesize"],
|
||
|
data["timestamp"],
|
||
|
data["patch_id"],
|
||
|
data["url_full"],
|
||
|
data["url_patch"]
|
||
|
]
|
||
|
)
|
||
|
completed = True
|
||
|
failed_streak = 0
|
||
|
print("target: {}, status: OK".format(target))
|
||
|
except:
|
||
|
if target not in memory["scrape"]["failed"]:
|
||
|
memory["scrape"]["failed"].append(target)
|
||
|
attempts += 1
|
||
|
failed_streak += 1
|
||
|
if config["scrape"]["increase_delay_on_fail"]:
|
||
|
extra_sleep = attempts
|
||
|
print("target: {}, status: FAILED, attempt: {}".format(target, attempts))
|
||
|
time.sleep(config["scrape"]["delay"] + extra_sleep)
|
||
|
if failed_streak > 10:
|
||
|
exit()
|
||
|
memory["scrape"]["last"] = target
|