我已经在Haskell中编写了以下代码来计算sha256。我发现代码很优雅,但是在GHC下,它在shaStep中花费了很多时间,而且,如果我正确地读取了分析数据,则将花费大量时间进行内存分配。鉴于实际上可以在没有内存分配的情况下计算sha256,我正在寻找有关如何找出正在做的分配并将其压缩的提示。

我的代码:

{-# OPTIONS_GHC -funbox-strict-fields #-}
module SHA256 (sha256, sha256Ascii, Hash8) where

import Data.Word
import Data.Bits
import Data.List
import Control.Monad (ap)

ch x y z = (x .&. y) `xor` (complement x .&. z)
maj x y z = (x .&. y) `xor` (x .&. z) `xor` (y .&. z)

bigSigma0 x = rotateR x 2 `xor` rotateR x 13 `xor` rotateR x 22
bigSigma1 x = rotateR x 6 `xor` rotateR x 11 `xor` rotateR x 25
smallSigma0 x = rotateR x 7 `xor` rotateR x 18 `xor` shiftR x 3
smallSigma1 x = rotateR x 17 `xor` rotateR x 19 `xor` shiftR x 10
ks = [0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
     ,0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
     ,0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
     ,0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
     ,0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
     ,0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
     ,0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
     ,0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2]

blockSize = 16

padding :: Int -> [Word8] -> [[Word32]]
padding blockSize x = unfoldr block $ paddingHelper x 0 (0::Int) (0::Integer)
 where
  block [] = Nothing
  block x = Just $ splitAt blockSize x
  paddingHelper x o on n | on == (bitSize o) = o:paddingHelper x 0 0 n
  paddingHelper (x:xs) o on n | on < (bitSize o) =
    paddingHelper xs ((shiftL o bs) .|. (fromIntegral x)) (on+bs) $! (n+fromIntegral bs)
   where
    bs = bitSize x
  paddingHelper [] o on n = (shiftL (shiftL o 1 .|. 1) (bso-on-1)):
                                (zeros ((-(fromIntegral n-on+3*bso)) `mod` (blockSize*bso)))
                                [fromIntegral (shiftR n bso), fromIntegral n]
       where
        bso = bitSize o
        zeros 0 = id
        zeros n | 0 < n = let z=0 in (z:) . (zeros (n-bitSize z))

data Hash8 = Hash8 {-# UNPACK #-} !Word32
                   {-# UNPACK #-} !Word32
                   {-# UNPACK #-} !Word32
                   {-# UNPACK #-} !Word32
                   {-# UNPACK #-} !Word32
                   {-# UNPACK #-} !Word32
                   {-# UNPACK #-} !Word32
                   {-# UNPACK #-} !Word32  deriving (Eq, Ord, Show)

shaStep :: Hash8 -> [Word32] -> Hash8
shaStep h m = foldl' (flip id) h (zipWith mkStep3 ks ws) `plus` h
 where
  ws = m++zipWith4 smallSigma (drop (blockSize-2) ws) (drop (blockSize-7) ws)
                              (drop (blockSize-15) ws) (drop (blockSize-16) ws)
   where
    smallSigma a b c d = smallSigma1 a + b + smallSigma0 c + d
  mkStep3 k w (Hash8 a b c d e f g h) = Hash8 (t1+t2) a b c (d+t1) e f g
   where
    t1 = h + bigSigma1 e + ch e f g + k + w
    t2 = bigSigma0 a + maj a b c
  (Hash8 x0 x1 x2 x3 x4 x5 x6 x7) `plus` (Hash8 y0 y1 y2 y3 y4 y5 y6 y7) =
    Hash8 (x0+y0) (x1+y1) (x2+y2) (x3+y3) (x4+y4) (x5+y5) (x6+y6) (x7+y7)

sha :: Hash8 -> [Word8] -> Hash8
sha h0 x = foldl' shaStep h0 $ padding blockSize x

sha256 :: [Word8] -> Hash8
sha256 = sha $
  Hash8 0x6a09e667 0xbb67ae85 0x3c6ef372 0xa54ff53a 0x510e527f 0x9b05688c 0x1f83d9ab 0x5be0cd19

sha256Ascii :: String -> Hash8
sha256Ascii = sha256 . map (fromIntegral . fromEnum)

编辑:我刚刚注意到,向chmaj和大和小sigma添加专门的类型签名对我的分析结果有很大的影响(而根本不影响未分析的程序)。因此,看来我的程序在shaStep上花费的时间没有我最初相信的那么多。

最佳答案

鉴于我到目前为止收到的评论(感谢大家!),这是shaStep的某种改进版本:

data Buffer = Buffer {-# UNPACK #-} !Hash8
                     {-# UNPACK #-} !Hash8

shaStep :: Hash8 -> Buffer -> Hash8
shaStep h m = go ks m h `plus` h
 where
  go [] _ h  = h
  go (k:ks) (Buffer (Hash8 a0 a1 a2 a3 a4 a5 a6 a7) (Hash8 a8 a9 aa ab ac ad ae af)) h =
    go ks (Buffer (Hash8 a1 a2 a3 a4 a5 a6 a7 a8) (Hash8 a9 aa ab ac ad ae af ag)) h'
   where
    h' = mkStep3 k a0 h
    ag = smallSigma ae a9 a1 a0
    smallSigma a b c d = smallSigma1 a + b + smallSigma0 c + d
    mkStep3 k w (Hash8 a b c d e f g h) = Hash8 (t1+t2) a b c (d+t1) e f g
     where
      t1 = h + bigSigma1 e + ch e f g + k + w
      t2 = bigSigma0 a + maj a b c
  (Hash8 x0 x1 x2 x3 x4 x5 x6 x7) `plus` (Hash8 y0 y1 y2 y3 y4 y5 y6 y7) =
    Hash8 (x0+y0) (x1+y1) (x2+y2) (x3+y3) (x4+y4) (x5+y5) (x6+y6) (x7+y7)

它不像原始代码那么好,因为我必须手工明确地保留16个Word32的缓冲区,但是我想这是可以的。也许还能做得更好?

需要修改block中的padding函数以返回Buffer的列表
  block [] = Nothing
  block (a0:a1:a2:a3:a4:a5:a6:a7:a8:a9:aa:ab:ac:ad:ae:af:as) =
    Just (Buffer (Hash8 a0 a1 a2 a3 a4 a5 a6 a7) (Hash8 a8 a9 aa ab ac ad ae af), as)

09-26 08:47