sha1.py 6.3 KB
Newer Older
P
piyush-kgp 已提交
1 2 3
"""
Demonstrates implementation of SHA1 Hash function in a Python class and gives utilities
to find hash of string or hash of text from a file.
P
piyush-kgp 已提交
4
Usage: python sha1.py --string "Hello World!!"
Q
QuantumNovice 已提交
5
       python sha1.py --file "hello_world.txt"
6 7
       When run without any arguments, it prints the hash of the string "Hello World!!
       Welcome to Cryptography"
P
piyush-kgp 已提交
8 9 10
Also contains a Test class to verify that the generated Hash is same as that
returned by the hashlib library

11
SHA1 hash or SHA1 sum of a string is a cryptographic function which means it is easy
C
Christian Clauss 已提交
12
to calculate forwards but extremely difficult to calculate backwards. What this means
P
piyush-kgp 已提交
13 14 15 16
is, you can easily calculate the hash of  a string, but it is extremely difficult to
know the original string if you have its hash. This property is useful to communicate
securely, send encrypted messages and is very useful in payment systems, blockchain
and cryptocurrency etc.
P
piyush-kgp 已提交
17 18
The Algorithm as described in the reference:
First we start with a message. The message is padded and the length of the message
P
piyush-kgp 已提交
19 20
is added to the end. It is then split into blocks of 512 bits or 64 bytes. The blocks
are then processed one at a time. Each block must be expanded and compressed.
P
piyush-kgp 已提交
21 22 23
The value after each compression is added to a 160bit buffer called the current hash
state. After the last block is processed the current hash state is returned as
the final hash.
P
piyush-kgp 已提交
24
Reference: https://deadhacker.com/2006/02/21/sha-1-illustrated/
P
piyush-kgp 已提交
25 26
"""
import argparse
W
William Zhang 已提交
27
import hashlib  # hashlib is only used inside the Test class
28
import struct
P
piyush-kgp 已提交
29
import unittest
P
piyush-kgp 已提交
30 31 32


class SHA1Hash:
P
piyush-kgp 已提交
33 34
    """
    Class to contain the entire pipeline for SHA1 Hashing Algorithm
Q
QuantumNovice 已提交
35 36
    >>> SHA1Hash(bytes('Allan', 'utf-8')).final_hash()
    '872af2d8ac3d8695387e7c804bf0e02c18df9e6e'
P
piyush-kgp 已提交
37
    """
W
William Zhang 已提交
38

P
piyush-kgp 已提交
39
    def __init__(self, data):
P
piyush-kgp 已提交
40 41
        """
        Inititates the variables data and h. h is a list of 5 8-digit Hexadecimal
42 43
        numbers corresponding to
        (1732584193, 4023233417, 2562383102, 271733878, 3285377520)
P
piyush-kgp 已提交
44 45 46
        respectively. We will start with this as a message digest. 0x is how you write
        Hexadecimal numbers in Python
        """
P
piyush-kgp 已提交
47
        self.data = data
P
piyush-kgp 已提交
48
        self.h = [0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0]
P
piyush-kgp 已提交
49

P
piyush-kgp 已提交
50 51 52 53
    @staticmethod
    def rotate(n, b):
        """
        Static method to be used inside other methods. Left rotates n by b.
Q
QuantumNovice 已提交
54 55
        >>> SHA1Hash('').rotate(12,2)
        48
P
piyush-kgp 已提交
56
        """
W
William Zhang 已提交
57
        return ((n << b) | (n >> (32 - b))) & 0xFFFFFFFF
P
piyush-kgp 已提交
58

P
piyush-kgp 已提交
59 60 61 62
    def padding(self):
        """
        Pads the input message with zeros so that padded_data has 64 bytes or 512 bits
        """
W
William Zhang 已提交
63 64
        padding = b"\x80" + b"\x00" * (63 - (len(self.data) + 8) % 64)
        padded_data = self.data + padding + struct.pack(">Q", 8 * len(self.data))
P
piyush-kgp 已提交
65 66 67 68 69 70
        return padded_data

    def split_blocks(self):
        """
        Returns a list of bytestrings each of length 64
        """
W
William Zhang 已提交
71 72 73
        return [
            self.padded_data[i : i + 64] for i in range(0, len(self.padded_data), 64)
        ]
P
piyush-kgp 已提交
74 75 76 77

    # @staticmethod
    def expand_block(self, block):
        """
78 79
        Takes a bytestring-block of length 64, unpacks it to a list of integers and
        returns a list of 80 integers after some bit operations
P
piyush-kgp 已提交
80
        """
W
William Zhang 已提交
81
        w = list(struct.unpack(">16L", block)) + [0] * 64
P
piyush-kgp 已提交
82
        for i in range(16, 80):
W
William Zhang 已提交
83
            w[i] = self.rotate((w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16]), 1)
P
piyush-kgp 已提交
84
        return w
P
piyush-kgp 已提交
85 86

    def final_hash(self):
P
piyush-kgp 已提交
87
        """
88 89 90
        Calls all the other methods to process the input. Pads the data, then splits
        into blocks and then does a series of operations for each block (including
        expansion).
91
        For each block, the variable h that was initialized is copied to a,b,c,d,e
92 93 94
        and these 5 variables a,b,c,d,e undergo several changes. After all the blocks
        are processed, these 5 variables are pairwise added to h ie a to h[0], b to h[1]
        and so on.  This h becomes our final hash which is returned.
P
piyush-kgp 已提交
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
        """
        self.padded_data = self.padding()
        self.blocks = self.split_blocks()
        for block in self.blocks:
            expanded_block = self.expand_block(block)
            a, b, c, d, e = self.h
            for i in range(0, 80):
                if 0 <= i < 20:
                    f = (b & c) | ((~b) & d)
                    k = 0x5A827999
                elif 20 <= i < 40:
                    f = b ^ c ^ d
                    k = 0x6ED9EBA1
                elif 40 <= i < 60:
                    f = (b & c) | (b & d) | (c & d)
                    k = 0x8F1BBCDC
                elif 60 <= i < 80:
                    f = b ^ c ^ d
                    k = 0xCA62C1D6
W
William Zhang 已提交
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
                a, b, c, d, e = (
                    self.rotate(a, 5) + f + e + k + expanded_block[i] & 0xFFFFFFFF,
                    a,
                    self.rotate(b, 30),
                    c,
                    d,
                )
        self.h = (
            self.h[0] + a & 0xFFFFFFFF,
            self.h[1] + b & 0xFFFFFFFF,
            self.h[2] + c & 0xFFFFFFFF,
            self.h[3] + d & 0xFFFFFFFF,
            self.h[4] + e & 0xFFFFFFFF,
        )
        return "%08x%08x%08x%08x%08x" % tuple(self.h)
P
piyush-kgp 已提交
129 130 131


class SHA1HashTest(unittest.TestCase):
P
piyush-kgp 已提交
132
    """
P
piyush-kgp 已提交
133
    Test class for the SHA1Hash class. Inherits the TestCase class from unittest
P
piyush-kgp 已提交
134
    """
W
William Zhang 已提交
135

P
piyush-kgp 已提交
136
    def testMatchHashes(self):
W
William Zhang 已提交
137
        msg = bytes("Test String", "utf-8")
P
piyush-kgp 已提交
138
        self.assertEqual(SHA1Hash(msg).final_hash(), hashlib.sha1(msg).hexdigest())
P
piyush-kgp 已提交
139 140 141


def main():
P
piyush-kgp 已提交
142
    """
143 144
    Provides option 'string' or 'file' to take input and prints the calculated SHA1
    hash.  unittest.main() has been commented because we probably don't want to run
145
    the test each time.
P
piyush-kgp 已提交
146
    """
147
    # unittest.main()
W
William Zhang 已提交
148 149 150 151 152 153 154 155
    parser = argparse.ArgumentParser(description="Process some strings or files")
    parser.add_argument(
        "--string",
        dest="input_string",
        default="Hello World!! Welcome to Cryptography",
        help="Hash the string",
    )
    parser.add_argument("--file", dest="input_file", help="Hash contents of a file")
P
piyush-kgp 已提交
156 157
    args = parser.parse_args()
    input_string = args.input_string
W
William Zhang 已提交
158
    # In any case hash input should be a bytestring
P
piyush-kgp 已提交
159
    if args.input_file:
W
William Zhang 已提交
160
        with open(args.input_file, "rb") as f:
161
            hash_input = f.read()
P
piyush-kgp 已提交
162
    else:
W
William Zhang 已提交
163
        hash_input = bytes(input_string, "utf-8")
P
piyush-kgp 已提交
164
    print(SHA1Hash(hash_input).final_hash())
P
piyush-kgp 已提交
165

166

W
William Zhang 已提交
167
if __name__ == "__main__":
P
piyush-kgp 已提交
168
    main()
Q
QuantumNovice 已提交
169
    import doctest
W
William Zhang 已提交
170 171

    doctest.testmod()