|
Hi. I just created a script for the creation of a digest of SHA-256 checksums of the files in a directory. You can control the 'base directory', the 'output directory' and the 'block size' (read at most 'block size' bytes from each file).
It is python 2.7 compatible code, as sys.version in FreeCAD 0.17 prints:
'2.7.14 (default, Jan 16 2018, 16:11:42) [MSC v.1800 64 bit (AMD64)'
Hopefully it will help with the automated creation of digests of hashes of future releases. I also published it as a gist, but I'm unsure that -- as a newly registered user -- that I can paste URLs here and not get automatically flagged as a spammer.
So, here's the code of sha256digest.py:
# -*- coding: utf-8 -*-
"""
Script to create SHA-256 digest of all files in the current directory
Based on an answer by Richard Neumann on Code Review
https://codereview.stackexchange.com/a/147191
Based on recommendation that file digests are created nowadays
https://en.wikipedia.org/wiki/File_verification#File_formats
Created on Fri Apr 20 12:07:41 2018
@author: rolandog
"""
from os import getcwd, listdir
from os.path import join, isfile
from time import strftime
from hashlib import sha256
def list_files(basedir=None):
"""List only files within the respective directory"""
if basedir is None:
basedir = getcwd()
for item in listdir(basedir):
path = join(basedir, item)
# skip listing a hash of our hash digest
if "sha256-digest" in item:
continue
if isfile(path):
# changed so that we get the path and the filename
yield (path, item)
def sha256sum(file_name, block_size=None):
"""Returns the sha256 checksum of the respective file"""
if block_size is None:
block_size = 4096
checksum = sha256()
with open(file_name, "rb") as file_handle:
block = file_handle.read(block_size)
while block:
checksum.update(block)
block = file_handle.read(block_size)
return checksum.hexdigest()
def sha256sums(basedir=None, block_size=None):
"""Yields (<sha256sum>, <file_name>) tuples
for files within the basedir.
"""
for file_path, file_name in list_files(basedir=basedir):
yield (sha256sum(file_path, block_size=block_size), file_name)
def create_sha256_digest(basedir=None, block_size=None, outputdir=None):
"""Creates de sha256-digest file with a timestamp"""
hash_file_name = strftime("sha256-digest_%Y%m%d-%H%M%S")
if outputdir is None:
outputdir = getcwd()
hash_file_path = join(outputdir, hash_file_name)
with open(hash_file_path, "w") as file_handle:
for file_hash in sha256sums(basedir, block_size):
file_handle.write(" *".join(file_hash) + "\n")
if __name__ == "__main__":
import argparse
PARSER = argparse.ArgumentParser()
PARSER.add_argument("-d",
"--directory",
default=None,
type=str,
required=False,
help="Path, str, to the directory of the files")
PARSER.add_argument("-b",
"--blocksize",
default=None,
type=int,
required=False,
help="Block size, int, in bytes to read from files")
PARSER.add_argument("-o",
"--outputdir",
default=None,
type=str,
required=False,
help="Output directory, str, for sha256 digest")
ARGUMENTS = PARSER.parse_args()
create_sha256_digest(basedir=ARGUMENTS.directory,
block_size=ARGUMENTS.blocksize,
outputdir=ARGUMENTS.outputdir)
|
|
I updated my script so that it has a permissive license and that it can also output individual sha256 files (if needed), though it defaults to a digest of checksums and filenames:
# -*- coding: utf-8 -*-
"""
Purpose
=======
Creates a SHA-256 digest of files in a directory
Attributions
============
Based on an answer by Richard Neumann on Code Review
https://codereview.stackexchange.com/a/147191
Based on statement that file digests are considered best-practice as of 2012
https://en.wikipedia.org/wiki/File_verification#File_formats
Creation and Modification Times
===============================
Created on Fri Apr 20 12:07:41 2018
Last Modified on Mon Apr 23 09:50:21 2018
License
=======
License: Creative Commons CC0
License URL: https://creativecommons.org/publicdomain/zero/1.0/legalcode
Changelog
=========
* 0.0.2
- Added option to request individual files instead of digest
- Don't output hash of currently running script
- Added changelog
- Added last modified date
- Added example usage by developer and end-user
- Added Creative Commons CC0 License
* 0.0.1
- Initial release.
@author: rolandog
@version: 0.0.2
"""
from os import getcwd, listdir
from os.path import join, isfile, basename
from time import strftime
from hashlib import sha256
from textwrap import dedent
def list_files(basedir=None):
"""List only files within the respective directory"""
if basedir is None:
basedir = getcwd()
# gets the name of the running script
script_name = basename(__file__)
for item in listdir(basedir):
path = join(basedir, item)
# don't make a hash of a hash file or current file
if "sha256-digest" in item or ".sha256" in item or script_name in item:
continue
if isfile(path):
# changed so that we get the path and the filename
yield (path, item)
def sha256sum(file_name, block_size=None):
"""Returns the sha256 checksum of the respective file"""
if block_size is None:
block_size = 4096
checksum = sha256()
with open(file_name, "rb") as file_handle:
block = file_handle.read(block_size)
while block:
checksum.update(block)
block = file_handle.read(block_size)
return checksum.hexdigest()
def sha256sums(basedir=None, block_size=None):
"""Yields (<sha256sum>, <file_name>) tuples
for files within the basedir.
"""
for file_path, file_name in list_files(basedir=basedir):
yield (sha256sum(file_path, block_size=block_size), file_name)
def create_sha256_digest(basedir=None,
block_size=None,
outputdir=None,
individual=False):
"""Creates de sha256-digest file with a timestamp"""
if outputdir is None:
outputdir = getcwd()
if individual is False:
hash_file_name = strftime("sha256-digest_%Y%m%d-%H%M%S")
hash_file_path = join(outputdir, hash_file_name)
with open(hash_file_path, "w") as file_handle:
for file_hash in sha256sums(basedir, block_size):
file_handle.write(" *".join(file_hash) + "\n")
else:
for checksum, file_name in sha256sums(basedir, block_size):
hash_file_name = file_name + ".sha256"
hash_file_path = join(outputdir, hash_file_name)
with open(hash_file_path, "w") as file_handle:
file_handle.write(" *".join((checksum, file_name)) + "\n")
if __name__ == "__main__":
from argparse import ArgumentParser, RawDescriptionHelpFormatter
DESCRIPTION = "Creates a SHA-256 digest of files in a directory"
EPILOG = """\
example usage by developer
--------------------------
ls
hello-world.txt sha256digest.py
cat hello-world.txt
Hello, World!
python sha256digest.py -i
ls
hello-world.txt hello-world.txt.sha256 sha256digest.py
cat hello-world.txt.sha256
c98c24b677eff44860afea6f493bbaec5bb1c4cbb209c6fc2bbb47f66ff2ad31 *hello-world.txt
example usage by end-user
-------------------------
ls
hello-world.txt hello-world.txt.sha256
sha256sum -c hello-world.txt.sha256
hello-world.txt: OK
"""
PARSER = ArgumentParser(prog="sha256digest.py",
description=DESCRIPTION,
formatter_class=RawDescriptionHelpFormatter,
epilog=dedent(EPILOG))
PARSER.add_argument("-i",
"--individual",
default=False,
action="store_true",
required=False,
help="outputs one hash file per file in folder")
PARSER.add_argument("-d",
"--directory",
default=None,
type=str,
required=False,
help="path to the folder containing the files")
PARSER.add_argument("-b",
"--blocksize",
default=None,
type=int,
required=False,
help="read files in chunks less than BLOCKSIZE bytes")
PARSER.add_argument("-o",
"--outputdir",
default=None,
type=str,
required=False,
help="output directory for sha256 digest or files")
ARGUMENTS = PARSER.parse_args()
create_sha256_digest(basedir=ARGUMENTS.directory,
block_size=ARGUMENTS.blocksize,
outputdir=ARGUMENTS.outputdir,
individual=ARGUMENTS.individual)
|