From fc64f09f0b2016c88bf93308c5e81c9509c2027d Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Mon, 30 May 2022 21:56:39 +0300 Subject: [PATCH] crypto.md5: improve performance of md5.blockblock_generic --- vlib/crypto/md5/md5block_generic.v | 42 ++++++++++++++++-------------- vlib/encoding/binary/binary.v | 24 ++++++++--------- vlib/math/bits/bits.v | 27 ++++++++++--------- 3 files changed, 50 insertions(+), 43 deletions(-) diff --git a/vlib/crypto/md5/md5block_generic.v b/vlib/crypto/md5/md5block_generic.v index f4020a177..da7ebad16 100644 --- a/vlib/crypto/md5/md5block_generic.v +++ b/vlib/crypto/md5/md5block_generic.v @@ -9,8 +9,14 @@ module md5 import math.bits -import encoding.binary +[direct_array_access; inline] +fn get_le_u32(b []u8, start int) u32 { + return u32(b[start]) | (u32(b[1 + start]) << u32(8)) | (u32(b[2 + start]) << u32(16)) | (u32(b[ + 3 + start]) << u32(24)) +} + +[direct_array_access] fn block_generic(mut dig Digest, p []u8) { // load state mut a := dig.s[0] @@ -19,8 +25,6 @@ fn block_generic(mut dig Digest, p []u8) { mut d := dig.s[3] for i := 0; i <= p.len - block_size; i += block_size { - mut q := p[i..] - q = q[..block_size] // save current state aa := a bb := b @@ -28,22 +32,22 @@ fn block_generic(mut dig Digest, p []u8) { dd := d // load input block - x0 := binary.little_endian_u32(q[4 * 0x0..]) - x1 := binary.little_endian_u32(q[4 * 0x1..]) - x2 := binary.little_endian_u32(q[4 * 0x2..]) - x3 := binary.little_endian_u32(q[4 * 0x3..]) - x4 := binary.little_endian_u32(q[4 * 0x4..]) - x5 := binary.little_endian_u32(q[4 * 0x5..]) - x6 := binary.little_endian_u32(q[4 * 0x6..]) - x7 := binary.little_endian_u32(q[4 * 0x7..]) - x8 := binary.little_endian_u32(q[4 * 0x8..]) - x9 := binary.little_endian_u32(q[4 * 0x9..]) - xa := binary.little_endian_u32(q[4 * 0xa..]) - xb := binary.little_endian_u32(q[4 * 0xb..]) - xc := binary.little_endian_u32(q[4 * 0xc..]) - xd := binary.little_endian_u32(q[4 * 0xd..]) - xe := binary.little_endian_u32(q[4 * 0xe..]) - xf := binary.little_endian_u32(q[4 * 0xf..]) + x0 := get_le_u32(p, 4 * 0x0 + i) + x1 := get_le_u32(p, 4 * 0x1 + i) + x2 := get_le_u32(p, 4 * 0x2 + i) + x3 := get_le_u32(p, 4 * 0x3 + i) + x4 := get_le_u32(p, 4 * 0x4 + i) + x5 := get_le_u32(p, 4 * 0x5 + i) + x6 := get_le_u32(p, 4 * 0x6 + i) + x7 := get_le_u32(p, 4 * 0x7 + i) + x8 := get_le_u32(p, 4 * 0x8 + i) + x9 := get_le_u32(p, 4 * 0x9 + i) + xa := get_le_u32(p, 4 * 0xa + i) + xb := get_le_u32(p, 4 * 0xb + i) + xc := get_le_u32(p, 4 * 0xc + i) + xd := get_le_u32(p, 4 * 0xd + i) + xe := get_le_u32(p, 4 * 0xe + i) + xf := get_le_u32(p, 4 * 0xf + i) // round 1 a = b + bits.rotate_left_32((((c ^ d) & b) ^ d) + a + x0 + u32(0xd76aa478), 7) diff --git a/vlib/encoding/binary/binary.v b/vlib/encoding/binary/binary.v index 1aa8c2eef..5f5478ee1 100644 --- a/vlib/encoding/binary/binary.v +++ b/vlib/encoding/binary/binary.v @@ -4,26 +4,26 @@ module binary // Little Endian -[inline] +[direct_array_access; inline] pub fn little_endian_u16(b []u8) u16 { _ = b[1] // bounds check return u16(b[0]) | (u16(b[1]) << u16(8)) } -[inline] +[direct_array_access; inline] pub fn little_endian_put_u16(mut b []u8, v u16) { _ = b[1] // bounds check b[0] = u8(v) b[1] = u8(v >> u16(8)) } -[inline] +[direct_array_access; inline] pub fn little_endian_u32(b []u8) u32 { _ = b[3] // bounds check return u32(b[0]) | (u32(b[1]) << u32(8)) | (u32(b[2]) << u32(16)) | (u32(b[3]) << u32(24)) } -[inline] +[direct_array_access; inline] pub fn little_endian_put_u32(mut b []u8, v u32) { _ = b[3] // bounds check b[0] = u8(v) @@ -32,13 +32,13 @@ pub fn little_endian_put_u32(mut b []u8, v u32) { b[3] = u8(v >> u32(24)) } -[inline] +[direct_array_access; inline] pub fn little_endian_u64(b []u8) u64 { _ = b[7] // bounds check return u64(b[0]) | (u64(b[1]) << u64(8)) | (u64(b[2]) << u64(16)) | (u64(b[3]) << u64(24)) | (u64(b[4]) << u64(32)) | (u64(b[5]) << u64(40)) | (u64(b[6]) << u64(48)) | (u64(b[7]) << u64(56)) } -[inline] +[direct_array_access; inline] pub fn little_endian_put_u64(mut b []u8, v u64) { _ = b[7] // bounds check b[0] = u8(v) @@ -52,26 +52,26 @@ pub fn little_endian_put_u64(mut b []u8, v u64) { } // Big Endian -[inline] +[direct_array_access; inline] pub fn big_endian_u16(b []u8) u16 { _ = b[1] // bounds check return u16(b[1]) | (u16(b[0]) << u16(8)) } -[inline] +[direct_array_access; inline] pub fn big_endian_put_u16(mut b []u8, v u16) { _ = b[1] // bounds check b[0] = u8(v >> u16(8)) b[1] = u8(v) } -[inline] +[direct_array_access; inline] pub fn big_endian_u32(b []u8) u32 { _ = b[3] // bounds check return u32(b[3]) | (u32(b[2]) << u32(8)) | (u32(b[1]) << u32(16)) | (u32(b[0]) << u32(24)) } -[inline] +[direct_array_access; inline] pub fn big_endian_put_u32(mut b []u8, v u32) { _ = b[3] // bounds check b[0] = u8(v >> u32(24)) @@ -80,13 +80,13 @@ pub fn big_endian_put_u32(mut b []u8, v u32) { b[3] = u8(v) } -[inline] +[direct_array_access; inline] pub fn big_endian_u64(b []u8) u64 { _ = b[7] // bounds check return u64(b[7]) | (u64(b[6]) << u64(8)) | (u64(b[5]) << u64(16)) | (u64(b[4]) << u64(24)) | (u64(b[3]) << u64(32)) | (u64(b[2]) << u64(40)) | (u64(b[1]) << u64(48)) | (u64(b[0]) << u64(56)) } -[inline] +[direct_array_access; inline] pub fn big_endian_put_u64(mut b []u8, v u64) { _ = b[7] // bounds check b[0] = u8(v >> u64(56)) diff --git a/vlib/math/bits/bits.v b/vlib/math/bits/bits.v index b75e2e549..65753b0e1 100644 --- a/vlib/math/bits/bits.v +++ b/vlib/math/bits/bits.v @@ -139,6 +139,13 @@ pub fn ones_count_64(x u64) int { return int(y) & ((1 << 7) - 1) } +const ( + n8 = u8(8) + n16 = u16(16) + n32 = u32(32) + n64 = u64(64) +) + // --- RotateLeft --- // rotate_left_8 returns the value of x rotated left by (k mod 8) bits. // To rotate x right by k bits, call rotate_left_8(x, -k). @@ -146,9 +153,8 @@ pub fn ones_count_64(x u64) int { // This function's execution time does not depend on the inputs. [inline] pub fn rotate_left_8(x u8, k int) u8 { - n := u8(8) - s := u8(k) & (n - u8(1)) - return (x << s) | (x >> (n - s)) + s := u8(k) & (bits.n8 - u8(1)) + return (x << s) | (x >> (bits.n8 - s)) } // rotate_left_16 returns the value of x rotated left by (k mod 16) bits. @@ -157,9 +163,8 @@ pub fn rotate_left_8(x u8, k int) u8 { // This function's execution time does not depend on the inputs. [inline] pub fn rotate_left_16(x u16, k int) u16 { - n := u16(16) - s := u16(k) & (n - u16(1)) - return (x << s) | (x >> (n - s)) + s := u16(k) & (bits.n16 - u16(1)) + return (x << s) | (x >> (bits.n16 - s)) } // rotate_left_32 returns the value of x rotated left by (k mod 32) bits. @@ -168,9 +173,8 @@ pub fn rotate_left_16(x u16, k int) u16 { // This function's execution time does not depend on the inputs. [inline] pub fn rotate_left_32(x u32, k int) u32 { - n := u32(32) - s := u32(k) & (n - u32(1)) - return (x << s) | (x >> (n - s)) + s := u32(k) & (bits.n32 - u32(1)) + return (x << s) | (x >> (bits.n32 - s)) } // rotate_left_64 returns the value of x rotated left by (k mod 64) bits. @@ -179,9 +183,8 @@ pub fn rotate_left_32(x u32, k int) u32 { // This function's execution time does not depend on the inputs. [inline] pub fn rotate_left_64(x u64, k int) u64 { - n := u64(64) - s := u64(k) & (n - u64(1)) - return (x << s) | (x >> (n - s)) + s := u64(k) & (bits.n64 - u64(1)) + return (x << s) | (x >> (bits.n64 - s)) } // --- Reverse --- -- 2.30.2