Added downloader script
This commit is contained in:
parent
1617b60ea6
commit
b0f3b10c34
|
@ -16,5 +16,8 @@
|
|||
"delay": 1,
|
||||
"max_attempts": 3,
|
||||
"increase_delay_on_fail": true
|
||||
},
|
||||
"downloader": {
|
||||
"download_folder": "/home/wayback/files"
|
||||
}
|
||||
}
|
47
downloader.py
Normal file
47
downloader.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
#Used to download all the files into local folders after scrape data has been saved
|
||||
|
||||
import urllib.request, json
|
||||
import MySQLdb
|
||||
import MySQLdb.cursors
|
||||
import os
|
||||
import atexit
|
||||
|
||||
with open("config.json", "r") as f:
|
||||
config = json.load(f)
|
||||
|
||||
with open("memory.json", "r") as f:
|
||||
memory = json.load(f)
|
||||
|
||||
sql = MySQLdb.connect(**config["sql"], cursorclass = MySQLdb.cursors.DictCursor)
|
||||
|
||||
cur = sql.cursor()
|
||||
|
||||
def on_close():
|
||||
with open("memory.json", "w") as f:
|
||||
json.dump(memory, f)
|
||||
print("Closing...")
|
||||
|
||||
atexit.register(on_close)
|
||||
|
||||
cur.execute("SELECT file_version,filename,file_hash,url_full FROM updates")
|
||||
data = cur.fetchall()
|
||||
|
||||
# Remove already downloaded files (checked from memory.json)
|
||||
data = data[memory["downloader"]["last"]:]
|
||||
|
||||
for row in data:
|
||||
try:
|
||||
print("Downloading {} with id {}".format(row["filename"], row["file_version"]))
|
||||
urllib.request.urlretrieve(
|
||||
row["url_full"],
|
||||
os.path.join(
|
||||
config["downloader"]["download_folder"],
|
||||
row["filename"],
|
||||
"f_" + row["file_hash"]
|
||||
)
|
||||
)
|
||||
print("Done.")
|
||||
except Exception as e:
|
||||
memory["downloader"]["failed"].append(row["file_version"])
|
||||
print("Error downloading file {}: {}".format(row["file_version"], e))
|
||||
memory["downloader"]["last"] += 1
|
11
memory.json
11
memory.json
|
@ -1 +1,10 @@
|
|||
{"scrape": {"last": 0, "failed": []}}
|
||||
{
|
||||
"scrape": {
|
||||
"last": 0,
|
||||
"failed": []
|
||||
},
|
||||
"downloader": {
|
||||
"last": 0,
|
||||
"failed": []
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user