Del old files
This commit is contained in:
parent
d75985651f
commit
05ecccf0da
67
scrape.py
67
scrape.py
|
@ -1,67 +0,0 @@
|
||||||
#Used for scraping osu files from osu.ppy.sh and storing them into self database
|
|
||||||
|
|
||||||
import urllib.request, json
|
|
||||||
import MySQLdb
|
|
||||||
import MySQLdb.cursors
|
|
||||||
import time, calendar
|
|
||||||
import atexit
|
|
||||||
|
|
||||||
finished = False
|
|
||||||
|
|
||||||
with open("config.json", "r") as f:
|
|
||||||
config = json.load(f)
|
|
||||||
|
|
||||||
with open("memory.json", "r") as f:
|
|
||||||
memory = json.load(f)
|
|
||||||
|
|
||||||
sql = MySQLdb.connect(**config["sql"], cursorclass = MySQLdb.cursors.DictCursor)
|
|
||||||
|
|
||||||
cur = sql.cursor()
|
|
||||||
|
|
||||||
def on_close():
|
|
||||||
with open("memory.json", "w") as f:
|
|
||||||
json.dump(memory, f)
|
|
||||||
print("Closing...")
|
|
||||||
|
|
||||||
atexit.register(on_close)
|
|
||||||
|
|
||||||
failed_streak = 0
|
|
||||||
|
|
||||||
while not finished:
|
|
||||||
target = memory["scrape"]["last"] + 1
|
|
||||||
attempts = 0
|
|
||||||
completed = False
|
|
||||||
extra_sleep = 0
|
|
||||||
while attempts < config["scrape"]["max_attempts"] and not completed:
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen("https://osu.ppy.sh/web/check-updates.php?action=path&stream=stable&target={}".format(target)) as url:
|
|
||||||
data = json.loads(url.read().decode())[0]
|
|
||||||
if "url_patch" not in data.keys():
|
|
||||||
data["url_patch"] = None
|
|
||||||
cur.execute("INSERT INTO updates (file_version,filename,file_hash,filesize,timestamp,patch_id,url_full,url_patch) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)",
|
|
||||||
[
|
|
||||||
data["file_version"],
|
|
||||||
data["filename"],
|
|
||||||
data["file_hash"],
|
|
||||||
data["filesize"],
|
|
||||||
calendar.timegm(time.strptime(data["timestamp"], "%Y-%m-%d %H:%M:%S")),
|
|
||||||
data["patch_id"],
|
|
||||||
data["url_full"],
|
|
||||||
data["url_patch"]
|
|
||||||
])
|
|
||||||
sql.commit()
|
|
||||||
completed = True
|
|
||||||
failed_streak = 0
|
|
||||||
print("target: {}, status: OK".format(target))
|
|
||||||
except:
|
|
||||||
if target not in memory["scrape"]["failed"]:
|
|
||||||
memory["scrape"]["failed"].append(target)
|
|
||||||
attempts += 1
|
|
||||||
failed_streak += 1
|
|
||||||
if config["scrape"]["increase_delay_on_fail"]:
|
|
||||||
extra_sleep = attempts
|
|
||||||
print("target: {}, status: FAILED, attempt: {}".format(target, attempts))
|
|
||||||
time.sleep(config["scrape"]["delay"] + extra_sleep)
|
|
||||||
if failed_streak > 100:
|
|
||||||
exit()
|
|
||||||
memory["scrape"]["last"] = target
|
|
50
zipper.py
50
zipper.py
|
@ -1,50 +0,0 @@
|
||||||
#Used to zip all the files into new local folders after downloader is done
|
|
||||||
|
|
||||||
import urllib.request, json
|
|
||||||
import MySQLdb
|
|
||||||
import MySQLdb.cursors
|
|
||||||
import os
|
|
||||||
import atexit
|
|
||||||
|
|
||||||
with open("config.json", "r") as f:
|
|
||||||
config = json.load(f)
|
|
||||||
|
|
||||||
with open("memory.json", "r") as f:
|
|
||||||
memory = json.load(f)
|
|
||||||
|
|
||||||
sql = MySQLdb.connect(**config["sql"], cursorclass = MySQLdb.cursors.DictCursor)
|
|
||||||
|
|
||||||
cur = sql.cursor()
|
|
||||||
|
|
||||||
def on_close():
|
|
||||||
with open("memory.json", "w") as f:
|
|
||||||
json.dump(memory, f)
|
|
||||||
print("Closing...")
|
|
||||||
|
|
||||||
atexit.register(on_close)
|
|
||||||
|
|
||||||
cur.execute("SELECT file_version,filename,file_hash,url_full FROM updates")
|
|
||||||
data = cur.fetchall()
|
|
||||||
|
|
||||||
# Remove already downloaded files (checked from memory.json)
|
|
||||||
data = data[memory["zipper"]["last"]:]
|
|
||||||
|
|
||||||
# Unfinished - replace with zipper code
|
|
||||||
"""
|
|
||||||
for row in data:
|
|
||||||
try:
|
|
||||||
print("Downloading {} with id {}".format(row["filename"], row["file_version"]))
|
|
||||||
urllib.request.urlretrieve(
|
|
||||||
row["url_full"],
|
|
||||||
os.path.join(
|
|
||||||
config["downloader"]["download_folder"],
|
|
||||||
row["filename"],
|
|
||||||
"f_" + row["file_hash"]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
print("Done.")
|
|
||||||
except Exception as e:
|
|
||||||
memory["downloader"]["failed"].append(row["file_version"])
|
|
||||||
print("Error downloading file {}: {}".format(row["file_version"], e))
|
|
||||||
memory["downloader"]["last"] += 1
|
|
||||||
"""
|
|
Loading…
Reference in New Issue
Block a user