Added downloader script
This commit is contained in:
parent
1617b60ea6
commit
b0f3b10c34
|
@ -16,5 +16,8 @@
|
||||||
"delay": 1,
|
"delay": 1,
|
||||||
"max_attempts": 3,
|
"max_attempts": 3,
|
||||||
"increase_delay_on_fail": true
|
"increase_delay_on_fail": true
|
||||||
}
|
},
|
||||||
|
"downloader": {
|
||||||
|
"download_folder": "/home/wayback/files"
|
||||||
|
}
|
||||||
}
|
}
|
47
downloader.py
Normal file
47
downloader.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
#Used to download all the files into local folders after scrape data has been saved
|
||||||
|
|
||||||
|
import urllib.request, json
|
||||||
|
import MySQLdb
|
||||||
|
import MySQLdb.cursors
|
||||||
|
import os
|
||||||
|
import atexit
|
||||||
|
|
||||||
|
with open("config.json", "r") as f:
|
||||||
|
config = json.load(f)
|
||||||
|
|
||||||
|
with open("memory.json", "r") as f:
|
||||||
|
memory = json.load(f)
|
||||||
|
|
||||||
|
sql = MySQLdb.connect(**config["sql"], cursorclass = MySQLdb.cursors.DictCursor)
|
||||||
|
|
||||||
|
cur = sql.cursor()
|
||||||
|
|
||||||
|
def on_close():
|
||||||
|
with open("memory.json", "w") as f:
|
||||||
|
json.dump(memory, f)
|
||||||
|
print("Closing...")
|
||||||
|
|
||||||
|
atexit.register(on_close)
|
||||||
|
|
||||||
|
cur.execute("SELECT file_version,filename,file_hash,url_full FROM updates")
|
||||||
|
data = cur.fetchall()
|
||||||
|
|
||||||
|
# Remove already downloaded files (checked from memory.json)
|
||||||
|
data = data[memory["downloader"]["last"]:]
|
||||||
|
|
||||||
|
for row in data:
|
||||||
|
try:
|
||||||
|
print("Downloading {} with id {}".format(row["filename"], row["file_version"]))
|
||||||
|
urllib.request.urlretrieve(
|
||||||
|
row["url_full"],
|
||||||
|
os.path.join(
|
||||||
|
config["downloader"]["download_folder"],
|
||||||
|
row["filename"],
|
||||||
|
"f_" + row["file_hash"]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
print("Done.")
|
||||||
|
except Exception as e:
|
||||||
|
memory["downloader"]["failed"].append(row["file_version"])
|
||||||
|
print("Error downloading file {}: {}".format(row["file_version"], e))
|
||||||
|
memory["downloader"]["last"] += 1
|
11
memory.json
11
memory.json
|
@ -1 +1,10 @@
|
||||||
{"scrape": {"last": 0, "failed": []}}
|
{
|
||||||
|
"scrape": {
|
||||||
|
"last": 0,
|
||||||
|
"failed": []
|
||||||
|
},
|
||||||
|
"downloader": {
|
||||||
|
"last": 0,
|
||||||
|
"failed": []
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user