diff --git a/include/aarch64_label.h b/include/aarch64_label.h
index a4e6d06..25dc8aa 100644
--- a/include/aarch64_label.h
+++ b/include/aarch64_label.h
@@ -3,16 +3,16 @@
 
 #ifdef __USER_LABEL_PREFIX__
 #define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-#define cdecl(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+#define CONCAT2(a, b) a##b
+#define cdecl(x)      CONCAT1(__USER_LABEL_PREFIX__, x)
 #else
 #define cdecl(x) x
 #endif
 
 #ifdef __APPLE__
-#define ASM_DEF_RODATA .section	__TEXT,__const
+#define ASM_DEF_RODATA .section __TEXT, __const
 #else
-#define ASM_DEF_RODATA .section .rodata
+#define ASM_DEF_RODATA .section.rodata
 #endif
 
 #endif
diff --git a/include/crc.h b/include/crc.h
index 0714960..e2c9902 100644
--- a/include/crc.h
+++ b/include/crc.h
@@ -27,13 +27,11 @@
   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 **********************************************************************/
 
-
 /**
  *  @file  crc.h
  *  @brief CRC functions.
  */
 
-
 #ifndef _CRC_H_
 #define _CRC_H_
 
@@ -43,7 +41,6 @@
 extern "C" {
 #endif
 
-
 /* Multi-binary functions */
 
 /**
@@ -54,12 +51,11 @@ extern "C" {
  *
  * @returns 16 bit CRC
  */
-uint16_t crc16_t10dif(
-	uint16_t init_crc,        //!< initial CRC value, 16 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
-
+uint16_t
+crc16_t10dif(uint16_t init_crc,        //!< initial CRC value, 16 bits
+             const unsigned char *buf, //!< buffer to calculate CRC on
+             uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC and copy T10 standard, runs appropriate version.
@@ -68,13 +64,12 @@ uint16_t crc16_t10dif(
  *
  * @returns 16 bit CRC
  */
-uint16_t crc16_t10dif_copy(
-	uint16_t init_crc,  //!< initial CRC value, 16 bits
-	uint8_t *dst,       //!< buffer destination for copy
-	uint8_t *src,       //!< buffer source to crc + copy
-	uint64_t len        //!< buffer length in bytes (64-bit data)
-	);
-
+uint16_t
+crc16_t10dif_copy(uint16_t init_crc, //!< initial CRC value, 16 bits
+                  uint8_t *dst,      //!< buffer destination for copy
+                  uint8_t *src,      //!< buffer source to crc + copy
+                  uint64_t len       //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from the IEEE standard, runs appropriate version.
@@ -96,11 +91,11 @@ uint16_t crc16_t10dif_copy(
  * @returns 32 bit CRC
  */
 
-uint32_t crc32_ieee(
-	uint32_t init_crc,        //!< initial CRC value, 32 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint32_t
+crc32_ieee(uint32_t init_crc,        //!< initial CRC value, 32 bits
+           const unsigned char *buf, //!< buffer to calculate CRC on
+           uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate the customized CRC
@@ -124,12 +119,11 @@ uint32_t crc32_ieee(
  *
  * @returns 32 bit CRC
  */
-uint32_t crc32_gzip_refl(
-	uint32_t init_crc,          //!< initial CRC value, 32 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len                //!< buffer length in bytes (64-bit data)
-	);
-
+uint32_t
+crc32_gzip_refl(uint32_t init_crc,        //!< initial CRC value, 32 bits
+                const unsigned char *buf, //!< buffer to calculate CRC on
+                uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief ISCSI CRC function, runs appropriate version.
@@ -139,12 +133,11 @@ uint32_t crc32_gzip_refl(
  *
  * @returns 32 bit CRC
  */
-unsigned int crc32_iscsi(
-	unsigned char *buffer, //!< buffer to calculate CRC on
-	int len,               //!< buffer length in bytes
-	unsigned int init_crc  //!< initial CRC value
-	);
-
+unsigned int
+crc32_iscsi(unsigned char *buffer, //!< buffer to calculate CRC on
+            int len,               //!< buffer length in bytes
+            unsigned int init_crc  //!< initial CRC value
+);
 
 /* Base functions */
 
@@ -152,45 +145,42 @@ unsigned int crc32_iscsi(
  * @brief ISCSI CRC function, baseline version
  * @returns 32 bit CRC
  */
-unsigned int crc32_iscsi_base(
-	unsigned char *buffer,	//!< buffer to calculate CRC on
-	int len, 		//!< buffer length in bytes
-	unsigned int crc_init	//!< initial CRC value
-	);
-
+unsigned int
+crc32_iscsi_base(unsigned char *buffer, //!< buffer to calculate CRC on
+                 int len,               //!< buffer length in bytes
+                 unsigned int crc_init  //!< initial CRC value
+);
 
 /**
  * @brief Generate CRC from the T10 standard, runs baseline version
  * @returns 16 bit CRC
  */
-uint16_t crc16_t10dif_base(
-	uint16_t seed,	//!< initial CRC value, 16 bits
-	uint8_t *buf,	//!< buffer to calculate CRC on
-	uint64_t len 	//!< buffer length in bytes (64-bit data)
-	);
-
+uint16_t
+crc16_t10dif_base(uint16_t seed, //!< initial CRC value, 16 bits
+                  uint8_t *buf,  //!< buffer to calculate CRC on
+                  uint64_t len   //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC and copy T10 standard, runs baseline version.
  * @returns 16 bit CRC
  */
-uint16_t crc16_t10dif_copy_base(
-	uint16_t init_crc,  //!< initial CRC value, 16 bits
-	uint8_t *dst,       //!< buffer destination for copy
-	uint8_t *src,       //!< buffer source to crc + copy
-	uint64_t len        //!< buffer length in bytes (64-bit data)
-	);
-
+uint16_t
+crc16_t10dif_copy_base(uint16_t init_crc, //!< initial CRC value, 16 bits
+                       uint8_t *dst,      //!< buffer destination for copy
+                       uint8_t *src,      //!< buffer source to crc + copy
+                       uint64_t len       //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from the IEEE standard, runs baseline version
  * @returns 32 bit CRC
  */
-uint32_t crc32_ieee_base(
-	uint32_t seed, 	//!< initial CRC value, 32 bits
-	uint8_t *buf,	//!< buffer to calculate CRC on
-	uint64_t len 	//!< buffer length in bytes (64-bit data)
-	);
+uint32_t
+crc32_ieee_base(uint32_t seed, //!< initial CRC value, 32 bits
+                uint8_t *buf,  //!< buffer to calculate CRC on
+                uint64_t len   //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate the customized CRC
@@ -198,12 +188,11 @@ uint32_t crc32_ieee_base(
  * runs baseline version
  * @returns 32 bit CRC
  */
-uint32_t crc32_gzip_refl_base(
-	uint32_t seed,	//!< initial CRC value, 32 bits
-	uint8_t *buf,	//!< buffer to calculate CRC on
-	uint64_t len	//!< buffer length in bytes (64-bit data)
-	);
-
+uint32_t
+crc32_gzip_refl_base(uint32_t seed, //!< initial CRC value, 32 bits
+                     uint8_t *buf,  //!< buffer to calculate CRC on
+                     uint64_t len   //!< buffer length in bytes (64-bit data)
+);
 
 #ifdef __cplusplus
 }
diff --git a/include/crc64.h b/include/crc64.h
index c4a5697..717f5b4 100644
--- a/include/crc64.h
+++ b/include/crc64.h
@@ -27,13 +27,11 @@
   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 **********************************************************************/
 
-
 /**
  *  @file  crc64.h
  *  @brief CRC64 functions.
  */
 
-
 #ifndef _CRC64_H_
 #define _CRC64_H_
 
@@ -43,7 +41,6 @@
 extern "C" {
 #endif
 
-
 /* Multi-binary functions */
 
 /**
@@ -54,11 +51,11 @@ extern "C" {
  * selects the appropriate version at runtime.
  * @returns 64 bit CRC
  */
-uint64_t crc64_ecma_refl(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_ecma_refl(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                const unsigned char *buf, //!< buffer to calculate CRC on
+                uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from ECMA-182 standard in normal format, runs
@@ -68,11 +65,11 @@ uint64_t crc64_ecma_refl(
  * selects the appropriate version at runtime.
  * @returns 64 bit CRC
  */
-uint64_t crc64_ecma_norm(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_ecma_norm(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                const unsigned char *buf, //!< buffer to calculate CRC on
+                uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from ISO standard in reflected format, runs
@@ -82,11 +79,11 @@ uint64_t crc64_ecma_norm(
  * selects the appropriate version at runtime.
  * @returns 64 bit CRC
  */
-uint64_t crc64_iso_refl(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_iso_refl(uint64_t init_crc,        //!< initial CRC value, 64 bits
+               const unsigned char *buf, //!< buffer to calculate CRC on
+               uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from ISO standard in normal format, runs
@@ -96,11 +93,11 @@ uint64_t crc64_iso_refl(
  * selects the appropriate version at runtime.
  * @returns 64 bit CRC
  */
-uint64_t crc64_iso_norm(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_iso_norm(uint64_t init_crc,        //!< initial CRC value, 64 bits
+               const unsigned char *buf, //!< buffer to calculate CRC on
+               uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from "Jones" coefficients in reflected format, runs
@@ -110,11 +107,11 @@ uint64_t crc64_iso_norm(
  * selects the appropriate version at runtime.
  * @returns 64 bit CRC
  */
-uint64_t crc64_jones_refl(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_jones_refl(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                 const unsigned char *buf, //!< buffer to calculate CRC on
+                 uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from "Jones" coefficients in normal format, runs
@@ -124,11 +121,11 @@ uint64_t crc64_jones_refl(
  * selects the appropriate version at runtime.
  * @returns 64 bit CRC
  */
-uint64_t crc64_jones_norm(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_jones_norm(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                 const unsigned char *buf, //!< buffer to calculate CRC on
+                 uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from "Rocksoft" coefficients in reflected format, runs
@@ -138,11 +135,11 @@ uint64_t crc64_jones_norm(
  * selects the appropriate version at runtime.
  * @returns 64 bit CRC
  */
-uint64_t crc64_rocksoft_refl(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_rocksoft_refl(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                    const unsigned char *buf, //!< buffer to calculate CRC on
+                    uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from "Rocksoft" coefficients in normal format, runs
@@ -152,11 +149,11 @@ uint64_t crc64_rocksoft_refl(
  * selects the appropriate version at runtime.
  * @returns 64 bit CRC
  */
-uint64_t crc64_rocksoft_norm(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_rocksoft_norm(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                    const unsigned char *buf, //!< buffer to calculate CRC on
+                    uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /* Arch specific versions */
 
@@ -167,11 +164,11 @@ uint64_t crc64_rocksoft_norm(
  * @returns 64 bit CRC
  */
 
-uint64_t crc64_ecma_refl_by8(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_ecma_refl_by8(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                    const unsigned char *buf, //!< buffer to calculate CRC on
+                    uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from ECMA-182 standard in normal format.
@@ -180,31 +177,31 @@ uint64_t crc64_ecma_refl_by8(
  * @returns 64 bit CRC
  */
 
-uint64_t crc64_ecma_norm_by8(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_ecma_norm_by8(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                    const unsigned char *buf, //!< buffer to calculate CRC on
+                    uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from ECMA-182 standard in reflected format, runs baseline version
  * @returns 64 bit CRC
  */
-uint64_t crc64_ecma_refl_base(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_ecma_refl_base(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                     const unsigned char *buf, //!< buffer to calculate CRC on
+                     uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from ECMA-182 standard in normal format, runs baseline version
  * @returns 64 bit CRC
  */
-uint64_t crc64_ecma_norm_base(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_ecma_norm_base(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                     const unsigned char *buf, //!< buffer to calculate CRC on
+                     uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from ISO standard in reflected format.
@@ -213,11 +210,11 @@ uint64_t crc64_ecma_norm_base(
  * @returns 64 bit CRC
  */
 
-uint64_t crc64_iso_refl_by8(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_iso_refl_by8(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                   const unsigned char *buf, //!< buffer to calculate CRC on
+                   uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from ISO standard in normal format.
@@ -226,31 +223,31 @@ uint64_t crc64_iso_refl_by8(
  * @returns 64 bit CRC
  */
 
-uint64_t crc64_iso_norm_by8(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_iso_norm_by8(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                   const unsigned char *buf, //!< buffer to calculate CRC on
+                   uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from ISO standard in reflected format, runs baseline version
  * @returns 64 bit CRC
  */
-uint64_t crc64_iso_refl_base(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_iso_refl_base(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                    const unsigned char *buf, //!< buffer to calculate CRC on
+                    uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from ISO standard in normal format, runs baseline version
  * @returns 64 bit CRC
  */
-uint64_t crc64_iso_norm_base(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_iso_norm_base(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                    const unsigned char *buf, //!< buffer to calculate CRC on
+                    uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from "Jones" coefficients in reflected format.
@@ -259,11 +256,11 @@ uint64_t crc64_iso_norm_base(
  * @returns 64 bit CRC
  */
 
-uint64_t crc64_jones_refl_by8(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_jones_refl_by8(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                     const unsigned char *buf, //!< buffer to calculate CRC on
+                     uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from "Jones" coefficients in normal format.
@@ -272,31 +269,31 @@ uint64_t crc64_jones_refl_by8(
  * @returns 64 bit CRC
  */
 
-uint64_t crc64_jones_norm_by8(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_jones_norm_by8(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                     const unsigned char *buf, //!< buffer to calculate CRC on
+                     uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from "Jones" coefficients in reflected format, runs baseline version
  * @returns 64 bit CRC
  */
-uint64_t crc64_jones_refl_base(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_jones_refl_base(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                      const unsigned char *buf, //!< buffer to calculate CRC on
+                      uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from "Jones" coefficients in normal format, runs baseline version
  * @returns 64 bit CRC
  */
-uint64_t crc64_jones_norm_base(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_jones_norm_base(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                      const unsigned char *buf, //!< buffer to calculate CRC on
+                      uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from "Rocksoft" coefficients in reflected format.
@@ -305,21 +302,21 @@ uint64_t crc64_jones_norm_base(
  * @returns 64 bit CRC
  */
 
-uint64_t crc64_rocksoft_refl_by8(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_rocksoft_refl_by8(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                        const unsigned char *buf, //!< buffer to calculate CRC on
+                        uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from "Rocksoft" coefficients in reflected format, runs baseline version
  * @returns 64 bit CRC
  */
-uint64_t crc64_rocksoft_refl_base(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_rocksoft_refl_base(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                         const unsigned char *buf, //!< buffer to calculate CRC on
+                         uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from "Rocksoft" coefficients in normal format.
@@ -328,21 +325,21 @@ uint64_t crc64_rocksoft_refl_base(
  * @returns 64 bit CRC
  */
 
-uint64_t crc64_rocksoft_norm_by8(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_rocksoft_norm_by8(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                        const unsigned char *buf, //!< buffer to calculate CRC on
+                        uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 /**
  * @brief Generate CRC from "Rocksoft" coefficients in normal format, runs baseline version
  * @returns 64 bit CRC
  */
-uint64_t crc64_rocksoft_norm_base(
-	uint64_t init_crc,        //!< initial CRC value, 64 bits
-	const unsigned char *buf, //!< buffer to calculate CRC on
-	uint64_t len              //!< buffer length in bytes (64-bit data)
-	);
+uint64_t
+crc64_rocksoft_norm_base(uint64_t init_crc,        //!< initial CRC value, 64 bits
+                         const unsigned char *buf, //!< buffer to calculate CRC on
+                         uint64_t len              //!< buffer length in bytes (64-bit data)
+);
 
 #ifdef __cplusplus
 }
diff --git a/include/erasure_code.h b/include/erasure_code.h
index 3db109d..6a142a7 100644
--- a/include/erasure_code.h
+++ b/include/erasure_code.h
@@ -27,7 +27,6 @@
   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 **********************************************************************/
 
-
 #ifndef _ERASURE_CODE_H_
 #define _ERASURE_CODE_H_
 
@@ -71,7 +70,8 @@ extern "C" {
  * @returns none
  */
 
-void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls);
+void
+ec_init_tables(int k, int rows, unsigned char *a, unsigned char *gftbls);
 
 /**
  * @brief Initialize tables for fast Erasure Code encode and decode, runs baseline version.
@@ -79,7 +79,8 @@ void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls);
  * Baseline version of ec_encode_data() with same parameters.
  */
 
-void ec_init_tables_base(int k, int rows, unsigned char* a, unsigned char* gftbls);
+void
+ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *gftbls);
 
 /**
  * @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
@@ -103,19 +104,22 @@ void ec_init_tables_base(int k, int rows, unsigned char* a, unsigned char* gftbl
  * @returns none
  */
 
-void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
-		    unsigned char **coding);
+void
+ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+               unsigned char **coding);
 
 /**
  * @brief Generate or decode erasure codes on blocks of data, runs baseline version.
  *
  * Baseline version of ec_encode_data() with same parameters.
  */
-void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
-			 unsigned char **dest);
+void
+ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
+                    unsigned char **dest);
 
 /**
- * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate version.
+ * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate
+ * version.
  *
  * Given one source data block, update one or multiple blocks of encoded data as
  * specified by a matrix of GF(2^8) coefficients. When given a suitable set of
@@ -136,8 +140,9 @@ void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigne
  * @param coding Array of pointers to coded output buffers.
  * @returns none
  */
-void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
-			   unsigned char *data, unsigned char **coding);
+void
+ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+                      unsigned char *data, unsigned char **coding);
 
 /**
  * @brief Generate update for encode or decode of erasure codes from single source.
@@ -145,8 +150,9 @@ void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g
  * Baseline version of ec_encode_data_update().
  */
 
-void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
-				unsigned char *data, unsigned char **dest);
+void
+ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
+                           unsigned char *data, unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product, runs baseline version.
@@ -168,9 +174,9 @@ void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned ch
  * @returns none
  */
 
-
-void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls,
-                        unsigned char **src, unsigned char *dest);
+void
+gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                      unsigned char *dest);
 
 /**
  * @brief GF(2^8) vector dot product, runs appropriate version.
@@ -192,8 +198,9 @@ void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls,
-                        unsigned char **src, unsigned char *dest);
+void
+gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                 unsigned char *dest);
 
 /**
  * @brief GF(2^8) vector multiply accumulate, runs appropriate version.
@@ -218,8 +225,9 @@ void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		 unsigned char *dest);
+void
+gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+            unsigned char *dest);
 
 /**
  * @brief GF(2^8) vector multiply accumulate, baseline version.
@@ -227,8 +235,9 @@ void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned ch
  * Baseline version of gf_vect_mad() with same parameters.
  */
 
-void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
-		      unsigned char *dest);
+void
+gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
+                 unsigned char *dest);
 
 // x86 only
 #if defined(__i386__) || defined(__x86_64__)
@@ -239,8 +248,9 @@ void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned ch
  * Arch specific version of ec_encode_data() with same parameters.
  * @requires SSE4.1
  */
-void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
-			unsigned char **coding);
+void
+ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+                   unsigned char **coding);
 
 /**
  * @brief Generate or decode erasure codes on blocks of data.
@@ -248,8 +258,9 @@ void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigne
  * Arch specific version of ec_encode_data() with same parameters.
  * @requires AVX
  */
-void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
-			unsigned char **coding);
+void
+ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+                   unsigned char **coding);
 
 /**
  * @brief Generate or decode erasure codes on blocks of data.
@@ -257,8 +268,9 @@ void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigne
  * Arch specific version of ec_encode_data() with same parameters.
  * @requires AVX2
  */
-void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
-			 unsigned char **coding);
+void
+ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+                    unsigned char **coding);
 
 /**
  * @brief Generate update for encode or decode of erasure codes from single source.
@@ -267,8 +279,9 @@ void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsign
  * @requires SSE4.1
  */
 
-void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
-			       unsigned char *data, unsigned char **coding);
+void
+ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+                          unsigned char *data, unsigned char **coding);
 
 /**
  * @brief Generate update for encode or decode of erasure codes from single source.
@@ -277,8 +290,9 @@ void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned cha
  * @requires AVX
  */
 
-void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
-			       unsigned char *data, unsigned char **coding);
+void
+ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+                          unsigned char *data, unsigned char **coding);
 
 /**
  * @brief Generate update for encode or decode of erasure codes from single source.
@@ -287,8 +301,9 @@ void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned cha
  * @requires AVX2
  */
 
-void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
-				unsigned char *data, unsigned char **coding);
+void
+ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+                           unsigned char *data, unsigned char **coding);
 
 /**
  * @brief GF(2^8) vector dot product.
@@ -308,8 +323,9 @@ void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned ch
  * @returns none
  */
 
-void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char *dest);
+void
+gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                     unsigned char *dest);
 
 /**
  * @brief GF(2^8) vector dot product.
@@ -329,8 +345,9 @@ void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char *dest);
+void
+gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                     unsigned char *dest);
 
 /**
  * @brief GF(2^8) vector dot product.
@@ -350,8 +367,9 @@ void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char *dest);
+void
+gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                      unsigned char *dest);
 
 /**
  * @brief GF(2^8) vector dot product with two outputs.
@@ -372,8 +390,9 @@ void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                      unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product with two outputs.
@@ -394,8 +413,9 @@ void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                      unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product with two outputs.
@@ -416,8 +436,9 @@ void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                       unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product with three outputs.
@@ -438,8 +459,9 @@ void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                      unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product with three outputs.
@@ -460,8 +482,9 @@ void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                      unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product with three outputs.
@@ -482,8 +505,9 @@ void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                       unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product with four outputs.
@@ -504,8 +528,9 @@ void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                      unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product with four outputs.
@@ -526,8 +551,9 @@ void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                      unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product with four outputs.
@@ -548,8 +574,9 @@ void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                       unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product with five outputs.
@@ -570,8 +597,9 @@ void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                      unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product with five outputs.
@@ -592,8 +620,9 @@ void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                      unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product with five outputs.
@@ -614,8 +643,9 @@ void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                       unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product with six outputs.
@@ -636,8 +666,9 @@ void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                      unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product with six outputs.
@@ -658,8 +689,9 @@ void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                      unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector dot product with six outputs.
@@ -680,8 +712,9 @@ void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
  * @returns none
  */
 
-void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
-			unsigned char **src, unsigned char **dest);
+void
+gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+                       unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector multiply accumulate, arch specific version.
@@ -690,8 +723,9 @@ void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
  * @requires SSE4.1
  */
 
-void gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		     unsigned char *dest);
+void
+gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                unsigned char *dest);
 /**
  * @brief GF(2^8) vector multiply accumulate, arch specific version.
  *
@@ -699,8 +733,9 @@ void gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigne
  * @requires AVX
  */
 
-void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		     unsigned char *dest);
+void
+gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                unsigned char *dest);
 
 /**
  * @brief GF(2^8) vector multiply accumulate, arch specific version.
@@ -709,9 +744,9 @@ void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigne
  * @requires AVX2
  */
 
-void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		      unsigned char *dest);
-
+void
+gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                 unsigned char *dest);
 
 /**
  * @brief GF(2^8) vector multiply with 2 accumulate.  SSE version.
@@ -734,21 +769,24 @@ void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsign
  * @returns none
  */
 
-void gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		      unsigned char **dest);
+void
+gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                 unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse().
  * @requires AVX
  */
-void gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		      unsigned char **dest);
+void
+gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                 unsigned char **dest);
 /**
  * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse().
  * @requires AVX2
  */
-void gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		       unsigned char **dest);
+void
+gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                  unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector multiply with 3 accumulate. SSE version.
@@ -771,22 +809,25 @@ void gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsig
  * @returns none
  */
 
-void gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		      unsigned char **dest);
+void
+gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                 unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse().
  * @requires AVX
  */
-void gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		      unsigned char **dest);
+void
+gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                 unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse().
  * @requires AVX2
  */
-void gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		       unsigned char **dest);
+void
+gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                  unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector multiply with 4 accumulate. SSE version.
@@ -809,61 +850,70 @@ void gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsig
  * @returns none
  */
 
-void gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		      unsigned char **dest);
+void
+gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                 unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse().
  * @requires AVX
  */
-void gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		      unsigned char **dest);
+void
+gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                 unsigned char **dest);
 /**
  * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse().
  * @requires AVX2
  */
-void gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		       unsigned char **dest);
+void
+gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                  unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector multiply with 5 accumulate. SSE version.
  * @requires SSE4.1
  */
-void gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		      unsigned char **dest);
+void
+gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                 unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector multiply with 5 accumulate. AVX version.
  * @requires AVX
  */
-void gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		      unsigned char **dest);
+void
+gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                 unsigned char **dest);
 /**
  * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version.
  * @requires AVX2
  */
-void gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		       unsigned char **dest);
+void
+gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                  unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector multiply with 6 accumulate. SSE version.
  * @requires SSE4.1
  */
-void gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		      unsigned char **dest);
+void
+gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                 unsigned char **dest);
 /**
  * @brief GF(2^8) vector multiply with 6 accumulate. AVX version.
  * @requires AVX
  */
-void gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		      unsigned char **dest);
+void
+gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                 unsigned char **dest);
 
 /**
  * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version.
  * @requires AVX2
  */
-void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		       unsigned char **dest);
+void
+gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+                  unsigned char **dest);
 
 #endif
 
@@ -879,7 +929,8 @@ void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsig
  * @returns  Product of a and b in GF(2^8)
  */
 
-unsigned char gf_mul(unsigned char a, unsigned char b);
+unsigned char
+gf_mul(unsigned char a, unsigned char b);
 
 /**
  * @brief Single element GF(2^8) inverse.
@@ -888,7 +939,8 @@ unsigned char gf_mul(unsigned char a, unsigned char b);
  * @returns  Field element b such that a x b = {1}
  */
 
-unsigned char gf_inv(unsigned char a);
+unsigned char
+gf_inv(unsigned char a);
 
 /**
  * @brief Generate a matrix of coefficients to be used for encoding.
@@ -914,7 +966,8 @@ unsigned char gf_inv(unsigned char a);
  * @returns  none
  */
 
-void gf_gen_rs_matrix(unsigned char *a, int m, int k);
+void
+gf_gen_rs_matrix(unsigned char *a, int m, int k);
 
 /**
  * @brief Generate a Cauchy matrix of coefficients to be used for encoding.
@@ -929,7 +982,8 @@ void gf_gen_rs_matrix(unsigned char *a, int m, int k);
  * @returns  none
  */
 
-void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
+void
+gf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
 
 /**
  * @brief Invert a matrix in GF(2^8)
@@ -943,8 +997,8 @@ void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
  * @returns 0 successful, other fail on singular input matrix
  */
 
-int gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
-
+int
+gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
 
 /*************************************************************/
 
diff --git a/include/gf_vect_mul.h b/include/gf_vect_mul.h
index 7cd9544..c8ffbd6 100644
--- a/include/gf_vect_mul.h
+++ b/include/gf_vect_mul.h
@@ -27,7 +27,6 @@
   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 **********************************************************************/
 
-
 #ifndef _GF_VECT_MUL_H
 #define _GF_VECT_MUL_H
 
@@ -46,7 +45,7 @@ extern "C" {
 // x86 only
 #if defined(__i386__) || defined(__x86_64__)
 
- /**
+/**
  * @brief GF(2^8) vector multiply by constant.
  *
  * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
@@ -64,10 +63,10 @@ extern "C" {
  * @returns 0 pass, other fail
  */
 
-int gf_vect_mul_sse(int len, unsigned char *gftbl, void *src, void *dest);
+int
+gf_vect_mul_sse(int len, unsigned char *gftbl, void *src, void *dest);
 
-
- /**
+/**
  * @brief GF(2^8) vector multiply by constant.
  *
  * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
@@ -85,7 +84,8 @@ int gf_vect_mul_sse(int len, unsigned char *gftbl, void *src, void *dest);
  * @returns 0 pass, other fail
  */
 
-int gf_vect_mul_avx(int len, unsigned char *gftbl, void *src, void *dest);
+int
+gf_vect_mul_avx(int len, unsigned char *gftbl, void *src, void *dest);
 
 #endif
 
@@ -109,8 +109,8 @@ int gf_vect_mul_avx(int len, unsigned char *gftbl, void *src, void *dest);
  * @returns 0 pass, other fail
  */
 
-int gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest);
-
+int
+gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest);
 
 /**
  * @brief Initialize 32-byte constant array for GF(2^8) vector multiply
@@ -122,8 +122,8 @@ int gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest);
  * @param gftbl Table output.
  */
 
-void gf_vect_mul_init(unsigned char c, unsigned char* gftbl);
-
+void
+gf_vect_mul_init(unsigned char c, unsigned char *gftbl);
 
 /**
  * @brief GF(2^8) vector multiply by constant, runs baseline version.
@@ -143,8 +143,8 @@ void gf_vect_mul_init(unsigned char c, unsigned char* gftbl);
  * @returns 0 pass, other fail
  */
 
-int gf_vect_mul_base(int len, unsigned char *a, unsigned char *src,
-                      unsigned char *dest);
+int
+gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest);
 
 #ifdef __cplusplus
 }
diff --git a/include/igzip_lib.h b/include/igzip_lib.h
index bc60aa0..ee9b72a 100644
--- a/include/igzip_lib.h
+++ b/include/igzip_lib.h
@@ -80,17 +80,17 @@ extern "C" {
 /******************************************************************************/
 /* Deflate Compression Standard Defines */
 /******************************************************************************/
-#define IGZIP_K  1024
-#define ISAL_DEF_MAX_HDR_SIZE 328
-#define ISAL_DEF_MAX_CODE_LEN 15
-#define ISAL_DEF_HIST_SIZE (32*IGZIP_K)
+#define IGZIP_K                1024
+#define ISAL_DEF_MAX_HDR_SIZE  328
+#define ISAL_DEF_MAX_CODE_LEN  15
+#define ISAL_DEF_HIST_SIZE     (32 * IGZIP_K)
 #define ISAL_DEF_MAX_HIST_BITS 15
-#define ISAL_DEF_MAX_MATCH 258
-#define ISAL_DEF_MIN_MATCH 3
+#define ISAL_DEF_MAX_MATCH     258
+#define ISAL_DEF_MIN_MATCH     3
 
-#define ISAL_DEF_LIT_SYMBOLS 257
-#define ISAL_DEF_LEN_SYMBOLS 29
-#define ISAL_DEF_DIST_SYMBOLS 30
+#define ISAL_DEF_LIT_SYMBOLS     257
+#define ISAL_DEF_LEN_SYMBOLS     29
+#define ISAL_DEF_DIST_SYMBOLS    30
 #define ISAL_DEF_LIT_LEN_SYMBOLS (ISAL_DEF_LIT_SYMBOLS + ISAL_DEF_LEN_SYMBOLS)
 
 /* Max repeat length, rounded up to 32 byte boundary */
@@ -118,53 +118,53 @@ extern "C" {
 
 #define ISAL_LIMIT_HASH_UPDATE
 
-#define IGZIP_HASH8K_HASH_SIZE (8 * IGZIP_K)
-#define IGZIP_HASH_HIST_SIZE IGZIP_HIST_SIZE
+#define IGZIP_HASH8K_HASH_SIZE   (8 * IGZIP_K)
+#define IGZIP_HASH_HIST_SIZE     IGZIP_HIST_SIZE
 #define IGZIP_HASH_MAP_HASH_SIZE IGZIP_HIST_SIZE
 
-#define IGZIP_LVL0_HASH_SIZE  (8 * IGZIP_K)
-#define IGZIP_LVL1_HASH_SIZE  IGZIP_HASH8K_HASH_SIZE
-#define IGZIP_LVL2_HASH_SIZE  IGZIP_HASH_HIST_SIZE
-#define IGZIP_LVL3_HASH_SIZE  IGZIP_HASH_MAP_HASH_SIZE
+#define IGZIP_LVL0_HASH_SIZE (8 * IGZIP_K)
+#define IGZIP_LVL1_HASH_SIZE IGZIP_HASH8K_HASH_SIZE
+#define IGZIP_LVL2_HASH_SIZE IGZIP_HASH_HIST_SIZE
+#define IGZIP_LVL3_HASH_SIZE IGZIP_HASH_MAP_HASH_SIZE
 
 #ifdef LONGER_HUFFTABLE
-enum {IGZIP_DIST_TABLE_SIZE = 8*1024};
+enum { IGZIP_DIST_TABLE_SIZE = 8 * 1024 };
 
 /* DECODE_OFFSET is dist code index corresponding to DIST_TABLE_SIZE + 1 */
 enum { IGZIP_DECODE_OFFSET = 26 };
 #else
-enum {IGZIP_DIST_TABLE_SIZE = 2};
+enum { IGZIP_DIST_TABLE_SIZE = 2 };
 /* DECODE_OFFSET is dist code index corresponding to DIST_TABLE_SIZE + 1 */
 enum { IGZIP_DECODE_OFFSET = 0 };
 #endif
-enum {IGZIP_LEN_TABLE_SIZE = 256};
-enum {IGZIP_LIT_TABLE_SIZE = ISAL_DEF_LIT_SYMBOLS};
+enum { IGZIP_LEN_TABLE_SIZE = 256 };
+enum { IGZIP_LIT_TABLE_SIZE = ISAL_DEF_LIT_SYMBOLS };
 
-#define IGZIP_HUFFTABLE_CUSTOM 0
+#define IGZIP_HUFFTABLE_CUSTOM  0
 #define IGZIP_HUFFTABLE_DEFAULT 1
-#define IGZIP_HUFFTABLE_STATIC 2
+#define IGZIP_HUFFTABLE_STATIC  2
 
 /* Flush Flags */
-#define NO_FLUSH	0	/* Default */
-#define SYNC_FLUSH	1
-#define FULL_FLUSH	2
-#define FINISH_FLUSH	0	/* Deprecated */
+#define NO_FLUSH     0 /* Default */
+#define SYNC_FLUSH   1
+#define FULL_FLUSH   2
+#define FINISH_FLUSH 0 /* Deprecated */
 
 /* Gzip Flags */
-#define IGZIP_DEFLATE	0	/* Default */
-#define IGZIP_GZIP	1
-#define IGZIP_GZIP_NO_HDR	2
-#define IGZIP_ZLIB	3
-#define IGZIP_ZLIB_NO_HDR	4
+#define IGZIP_DEFLATE     0 /* Default */
+#define IGZIP_GZIP        1
+#define IGZIP_GZIP_NO_HDR 2
+#define IGZIP_ZLIB        3
+#define IGZIP_ZLIB_NO_HDR 4
 
 /* Compression Return values */
-#define COMP_OK 0
-#define INVALID_FLUSH -7
-#define INVALID_PARAM -8
-#define STATELESS_OVERFLOW -1
+#define COMP_OK                0
+#define INVALID_FLUSH          -7
+#define INVALID_PARAM          -8
+#define STATELESS_OVERFLOW     -1
 #define ISAL_INVALID_OPERATION -9
-#define ISAL_INVALID_STATE -3
-#define ISAL_INVALID_LEVEL -4	/* Invalid Compression level set */
+#define ISAL_INVALID_STATE     -3
+#define ISAL_INVALID_LEVEL     -4 /* Invalid Compression level set */
 #define ISAL_INVALID_LEVEL_BUF -5 /* Invalid buffer specified for the compression level */
 
 /**
@@ -172,35 +172,34 @@ enum {IGZIP_LIT_TABLE_SIZE = ISAL_DEF_LIT_SYMBOLS};
  *  @brief Compression State please note ZSTATE_TRL only applies for GZIP compression
  */
 
-
 /* When the state is set to ZSTATE_NEW_HDR or TMP_ZSTATE_NEW_HEADER, the
  * hufftable being used for compression may be swapped
  */
 enum isal_zstate_state {
-	ZSTATE_NEW_HDR,  //!< Header to be written
-	ZSTATE_HDR,	//!< Header state
-	ZSTATE_CREATE_HDR, //!< Header to be created
-	ZSTATE_BODY,	//!< Body state
-	ZSTATE_FLUSH_READ_BUFFER, //!< Flush buffer
-	ZSTATE_FLUSH_ICF_BUFFER,
-	ZSTATE_TYPE0_HDR, //! Type0 block header to be written
-	ZSTATE_TYPE0_BODY, //!< Type0 block body to be written
-	ZSTATE_SYNC_FLUSH, //!< Write sync flush block
-	ZSTATE_FLUSH_WRITE_BUFFER, //!< Flush bitbuf
-	ZSTATE_TRL,	//!< Trailer state
-	ZSTATE_END,	//!< End state
-	ZSTATE_TMP_NEW_HDR, //!< Temporary Header to be written
-	ZSTATE_TMP_HDR,	//!< Temporary Header state
-	ZSTATE_TMP_CREATE_HDR, //!< Temporary Header to be created state
-	ZSTATE_TMP_BODY,	//!< Temporary Body state
-	ZSTATE_TMP_FLUSH_READ_BUFFER, //!< Flush buffer
-	ZSTATE_TMP_FLUSH_ICF_BUFFER,
-	ZSTATE_TMP_TYPE0_HDR, //! Temporary Type0 block header to be written
-	ZSTATE_TMP_TYPE0_BODY, //!< Temporary Type0 block body to be written
-	ZSTATE_TMP_SYNC_FLUSH, //!< Write sync flush block
-	ZSTATE_TMP_FLUSH_WRITE_BUFFER, //!< Flush bitbuf
-	ZSTATE_TMP_TRL,	//!< Temporary Trailer state
-	ZSTATE_TMP_END	//!< Temporary End state
+        ZSTATE_NEW_HDR,           //!< Header to be written
+        ZSTATE_HDR,               //!< Header state
+        ZSTATE_CREATE_HDR,        //!< Header to be created
+        ZSTATE_BODY,              //!< Body state
+        ZSTATE_FLUSH_READ_BUFFER, //!< Flush buffer
+        ZSTATE_FLUSH_ICF_BUFFER,
+        ZSTATE_TYPE0_HDR,             //! Type0 block header to be written
+        ZSTATE_TYPE0_BODY,            //!< Type0 block body to be written
+        ZSTATE_SYNC_FLUSH,            //!< Write sync flush block
+        ZSTATE_FLUSH_WRITE_BUFFER,    //!< Flush bitbuf
+        ZSTATE_TRL,                   //!< Trailer state
+        ZSTATE_END,                   //!< End state
+        ZSTATE_TMP_NEW_HDR,           //!< Temporary Header to be written
+        ZSTATE_TMP_HDR,               //!< Temporary Header state
+        ZSTATE_TMP_CREATE_HDR,        //!< Temporary Header to be created state
+        ZSTATE_TMP_BODY,              //!< Temporary Body state
+        ZSTATE_TMP_FLUSH_READ_BUFFER, //!< Flush buffer
+        ZSTATE_TMP_FLUSH_ICF_BUFFER,
+        ZSTATE_TMP_TYPE0_HDR,          //! Temporary Type0 block header to be written
+        ZSTATE_TMP_TYPE0_BODY,         //!< Temporary Type0 block body to be written
+        ZSTATE_TMP_SYNC_FLUSH,         //!< Write sync flush block
+        ZSTATE_TMP_FLUSH_WRITE_BUFFER, //!< Flush bitbuf
+        ZSTATE_TMP_TRL,                //!< Temporary Trailer state
+        ZSTATE_TMP_END                 //!< Temporary End state
 };
 
 /* Offset used to switch between TMP states and non-tmp states */
@@ -209,49 +208,49 @@ enum isal_zstate_state {
 /******************************************************************************/
 /* Inflate Implementation Specific Defines */
 /******************************************************************************/
-#define ISAL_DECODE_LONG_BITS 12
+#define ISAL_DECODE_LONG_BITS  12
 #define ISAL_DECODE_SHORT_BITS 10
 
 /* Current state of decompression */
 enum isal_block_state {
-	ISAL_BLOCK_NEW_HDR,	/* Just starting a new block */
-	ISAL_BLOCK_HDR,		/* In the middle of reading in a block header */
-	ISAL_BLOCK_TYPE0,	/* Decoding a type 0 block */
-	ISAL_BLOCK_CODED,	/* Decoding a huffman coded block */
-	ISAL_BLOCK_INPUT_DONE,	/* Decompression of input is completed */
-	ISAL_BLOCK_FINISH,	/* Decompression of input is completed and all data has been flushed to output */
-	ISAL_GZIP_EXTRA_LEN,
-	ISAL_GZIP_EXTRA,
-	ISAL_GZIP_NAME,
-	ISAL_GZIP_COMMENT,
-	ISAL_GZIP_HCRC,
-	ISAL_ZLIB_DICT,
-	ISAL_CHECKSUM_CHECK,
+        ISAL_BLOCK_NEW_HDR,    /* Just starting a new block */
+        ISAL_BLOCK_HDR,        /* In the middle of reading in a block header */
+        ISAL_BLOCK_TYPE0,      /* Decoding a type 0 block */
+        ISAL_BLOCK_CODED,      /* Decoding a huffman coded block */
+        ISAL_BLOCK_INPUT_DONE, /* Decompression of input is completed */
+        ISAL_BLOCK_FINISH, /* Decompression of input is completed and all data has been flushed to
+                              output */
+        ISAL_GZIP_EXTRA_LEN,
+        ISAL_GZIP_EXTRA,
+        ISAL_GZIP_NAME,
+        ISAL_GZIP_COMMENT,
+        ISAL_GZIP_HCRC,
+        ISAL_ZLIB_DICT,
+        ISAL_CHECKSUM_CHECK,
 };
 
-
 /* Inflate Flags */
-#define ISAL_DEFLATE	0	/* Default */
-#define ISAL_GZIP	1
-#define ISAL_GZIP_NO_HDR	2
-#define ISAL_ZLIB	3
-#define ISAL_ZLIB_NO_HDR	4
-#define ISAL_ZLIB_NO_HDR_VER	5
-#define ISAL_GZIP_NO_HDR_VER	6
+#define ISAL_DEFLATE         0 /* Default */
+#define ISAL_GZIP            1
+#define ISAL_GZIP_NO_HDR     2
+#define ISAL_ZLIB            3
+#define ISAL_ZLIB_NO_HDR     4
+#define ISAL_ZLIB_NO_HDR_VER 5
+#define ISAL_GZIP_NO_HDR_VER 6
 
 /* Inflate Return values */
-#define ISAL_DECOMP_OK 0	/* No errors encountered while decompressing */
-#define ISAL_END_INPUT 1	/* End of input reached */
-#define ISAL_OUT_OVERFLOW 2	/* End of output reached */
-#define ISAL_NAME_OVERFLOW 3	/* End of gzip name buffer reached */
-#define ISAL_COMMENT_OVERFLOW 4	/* End of gzip name buffer reached */
-#define ISAL_EXTRA_OVERFLOW 5	/* End of extra buffer reached */
-#define ISAL_NEED_DICT 6 /* Stream needs a dictionary to continue */
-#define ISAL_INVALID_BLOCK -1	/* Invalid deflate block found */
-#define ISAL_INVALID_SYMBOL -2	/* Invalid deflate symbol found */
-#define ISAL_INVALID_LOOKBACK -3	/* Invalid lookback distance found */
-#define ISAL_INVALID_WRAPPER -4 /* Invalid gzip/zlib wrapper found */
-#define ISAL_UNSUPPORTED_METHOD -5	/* Gzip/zlib wrapper specifies unsupported compress method */
+#define ISAL_DECOMP_OK          0  /* No errors encountered while decompressing */
+#define ISAL_END_INPUT          1  /* End of input reached */
+#define ISAL_OUT_OVERFLOW       2  /* End of output reached */
+#define ISAL_NAME_OVERFLOW      3  /* End of gzip name buffer reached */
+#define ISAL_COMMENT_OVERFLOW   4  /* End of gzip name buffer reached */
+#define ISAL_EXTRA_OVERFLOW     5  /* End of extra buffer reached */
+#define ISAL_NEED_DICT          6  /* Stream needs a dictionary to continue */
+#define ISAL_INVALID_BLOCK      -1 /* Invalid deflate block found */
+#define ISAL_INVALID_SYMBOL     -2 /* Invalid deflate symbol found */
+#define ISAL_INVALID_LOOKBACK   -3 /* Invalid lookback distance found */
+#define ISAL_INVALID_WRAPPER    -4 /* Invalid gzip/zlib wrapper found */
+#define ISAL_UNSUPPORTED_METHOD -5 /* Gzip/zlib wrapper specifies unsupported compress method */
 #define ISAL_INCORRECT_CHECKSUM -6 /* Incorrect checksum found */
 
 /******************************************************************************/
@@ -259,15 +258,16 @@ enum isal_block_state {
 /******************************************************************************/
 /** @brief Holds histogram of deflate symbols*/
 struct isal_huff_histogram {
-	uint64_t lit_len_histogram[ISAL_DEF_LIT_LEN_SYMBOLS]; //!< Histogram of Literal/Len symbols seen
-	uint64_t dist_histogram[ISAL_DEF_DIST_SYMBOLS]; //!< Histogram of Distance Symbols seen
-	uint16_t hash_table[IGZIP_LVL0_HASH_SIZE]; //!< Tmp space used as a hash table
+        uint64_t lit_len_histogram[ISAL_DEF_LIT_LEN_SYMBOLS]; //!< Histogram of Literal/Len symbols
+                                                              //!< seen
+        uint64_t dist_histogram[ISAL_DEF_DIST_SYMBOLS]; //!< Histogram of Distance Symbols seen
+        uint16_t hash_table[IGZIP_LVL0_HASH_SIZE];      //!< Tmp space used as a hash table
 };
 
 /** @brief Holds modified histogram */
 struct isal_mod_hist {
-    uint32_t d_hist[30]; //!< Distance
-    uint32_t ll_hist[513]; //! Literal/length
+        uint32_t d_hist[30];   //!< Distance
+        uint32_t ll_hist[513]; //! Literal/length
 };
 
 #define ISAL_DEF_MIN_LEVEL 0
@@ -275,80 +275,80 @@ struct isal_mod_hist {
 
 /* Defines used set level data sizes */
 /* has to be at least sizeof(struct level_buf) + sizeof(struct lvlX_buf */
-#define ISAL_DEF_LVL0_REQ 0
-#define ISAL_DEF_LVL1_REQ (4 * IGZIP_K + 2 * IGZIP_LVL1_HASH_SIZE)
+#define ISAL_DEF_LVL0_REQ        0
+#define ISAL_DEF_LVL1_REQ        (4 * IGZIP_K + 2 * IGZIP_LVL1_HASH_SIZE)
 #define ISAL_DEF_LVL1_TOKEN_SIZE 4
-#define ISAL_DEF_LVL2_REQ (4 * IGZIP_K + 2 * IGZIP_LVL2_HASH_SIZE)
+#define ISAL_DEF_LVL2_REQ        (4 * IGZIP_K + 2 * IGZIP_LVL2_HASH_SIZE)
 #define ISAL_DEF_LVL2_TOKEN_SIZE 4
-#define ISAL_DEF_LVL3_REQ 4 * IGZIP_K + 4 * 4 * IGZIP_K + 2 * IGZIP_LVL3_HASH_SIZE
+#define ISAL_DEF_LVL3_REQ        4 * IGZIP_K + 4 * 4 * IGZIP_K + 2 * IGZIP_LVL3_HASH_SIZE
 #define ISAL_DEF_LVL3_TOKEN_SIZE 4
 
 /* Data sizes for level specific data options */
-#define ISAL_DEF_LVL0_MIN ISAL_DEF_LVL0_REQ
-#define ISAL_DEF_LVL0_SMALL ISAL_DEF_LVL0_REQ
-#define ISAL_DEF_LVL0_MEDIUM ISAL_DEF_LVL0_REQ
-#define ISAL_DEF_LVL0_LARGE ISAL_DEF_LVL0_REQ
+#define ISAL_DEF_LVL0_MIN         ISAL_DEF_LVL0_REQ
+#define ISAL_DEF_LVL0_SMALL       ISAL_DEF_LVL0_REQ
+#define ISAL_DEF_LVL0_MEDIUM      ISAL_DEF_LVL0_REQ
+#define ISAL_DEF_LVL0_LARGE       ISAL_DEF_LVL0_REQ
 #define ISAL_DEF_LVL0_EXTRA_LARGE ISAL_DEF_LVL0_REQ
-#define ISAL_DEF_LVL0_DEFAULT ISAL_DEF_LVL0_REQ
+#define ISAL_DEF_LVL0_DEFAULT     ISAL_DEF_LVL0_REQ
 
-#define ISAL_DEF_LVL1_MIN (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 1 * IGZIP_K)
-#define ISAL_DEF_LVL1_SMALL (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 16 * IGZIP_K)
-#define ISAL_DEF_LVL1_MEDIUM (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 32 * IGZIP_K)
-#define ISAL_DEF_LVL1_LARGE (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 64 * IGZIP_K)
+#define ISAL_DEF_LVL1_MIN         (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 1 * IGZIP_K)
+#define ISAL_DEF_LVL1_SMALL       (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 16 * IGZIP_K)
+#define ISAL_DEF_LVL1_MEDIUM      (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 32 * IGZIP_K)
+#define ISAL_DEF_LVL1_LARGE       (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 64 * IGZIP_K)
 #define ISAL_DEF_LVL1_EXTRA_LARGE (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 128 * IGZIP_K)
-#define ISAL_DEF_LVL1_DEFAULT ISAL_DEF_LVL1_LARGE
+#define ISAL_DEF_LVL1_DEFAULT     ISAL_DEF_LVL1_LARGE
 
-#define ISAL_DEF_LVL2_MIN (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 1 * IGZIP_K)
-#define ISAL_DEF_LVL2_SMALL (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 16 * IGZIP_K)
-#define ISAL_DEF_LVL2_MEDIUM (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 32 * IGZIP_K)
-#define ISAL_DEF_LVL2_LARGE (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 64 * IGZIP_K)
+#define ISAL_DEF_LVL2_MIN         (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 1 * IGZIP_K)
+#define ISAL_DEF_LVL2_SMALL       (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 16 * IGZIP_K)
+#define ISAL_DEF_LVL2_MEDIUM      (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 32 * IGZIP_K)
+#define ISAL_DEF_LVL2_LARGE       (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 64 * IGZIP_K)
 #define ISAL_DEF_LVL2_EXTRA_LARGE (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 128 * IGZIP_K)
-#define ISAL_DEF_LVL2_DEFAULT ISAL_DEF_LVL2_LARGE
+#define ISAL_DEF_LVL2_DEFAULT     ISAL_DEF_LVL2_LARGE
 
-#define ISAL_DEF_LVL3_MIN (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 1 * IGZIP_K)
-#define ISAL_DEF_LVL3_SMALL (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 16 * IGZIP_K)
-#define ISAL_DEF_LVL3_MEDIUM (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 32 * IGZIP_K)
-#define ISAL_DEF_LVL3_LARGE (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 64 * IGZIP_K)
+#define ISAL_DEF_LVL3_MIN         (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 1 * IGZIP_K)
+#define ISAL_DEF_LVL3_SMALL       (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 16 * IGZIP_K)
+#define ISAL_DEF_LVL3_MEDIUM      (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 32 * IGZIP_K)
+#define ISAL_DEF_LVL3_LARGE       (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 64 * IGZIP_K)
 #define ISAL_DEF_LVL3_EXTRA_LARGE (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 128 * IGZIP_K)
-#define ISAL_DEF_LVL3_DEFAULT ISAL_DEF_LVL3_LARGE
+#define ISAL_DEF_LVL3_DEFAULT     ISAL_DEF_LVL3_LARGE
 
-#define IGZIP_NO_HIST 0
-#define IGZIP_HIST 1
-#define IGZIP_DICT_HIST 2
+#define IGZIP_NO_HIST       0
+#define IGZIP_HIST          1
+#define IGZIP_DICT_HIST     2
 #define IGZIP_DICT_HASH_SET 3
 
 /** @brief Holds Bit Buffer information*/
 struct BitBuf2 {
-	uint64_t m_bits;	//!< bits in the bit buffer
-	uint32_t m_bit_count;	//!< number of valid bits in the bit buffer
-	uint8_t *m_out_buf;	//!< current index of buffer to write to
-	uint8_t *m_out_end;	//!< end of buffer to write to
-	uint8_t *m_out_start;	//!< start of buffer to write to
+        uint64_t m_bits;      //!< bits in the bit buffer
+        uint32_t m_bit_count; //!< number of valid bits in the bit buffer
+        uint8_t *m_out_buf;   //!< current index of buffer to write to
+        uint8_t *m_out_end;   //!< end of buffer to write to
+        uint8_t *m_out_start; //!< start of buffer to write to
 };
 
 /** @brief Holds Zlib header information */
 struct isal_zlib_header {
-	uint32_t info;		//!< base-2 logarithm of the LZ77 window size minus 8
-	uint32_t level;		//!< Compression level (fastest, fast, default, maximum)
-	uint32_t dict_id;	//!< Dictionary id
-	uint32_t dict_flag;	//!< Whether to use a dictionary
+        uint32_t info;      //!< base-2 logarithm of the LZ77 window size minus 8
+        uint32_t level;     //!< Compression level (fastest, fast, default, maximum)
+        uint32_t dict_id;   //!< Dictionary id
+        uint32_t dict_flag; //!< Whether to use a dictionary
 };
 
 /** @brief Holds Gzip header information */
 struct isal_gzip_header {
-	uint32_t text;		//!< Optional Text hint
-	uint32_t time;		//!< Unix modification time in gzip header
-	uint32_t xflags;	//!< xflags in gzip header
-	uint32_t os;		//!< OS in gzip header
-	uint8_t *extra;		//!< Extra field in gzip header
-	uint32_t extra_buf_len;	//!< Length of extra buffer
-	uint32_t extra_len;	//!< Actual length of gzip header extra field
-	char *name;		//!< Name in gzip header
-	uint32_t name_buf_len;	//!< Length of name buffer
-	char *comment;		//!< Comments in gzip header
-	uint32_t comment_buf_len;	//!< Length of comment buffer
-	uint32_t hcrc;		//!< Header crc or header crc flag
-	uint32_t flags;		//!< Internal data
+        uint32_t text;            //!< Optional Text hint
+        uint32_t time;            //!< Unix modification time in gzip header
+        uint32_t xflags;          //!< xflags in gzip header
+        uint32_t os;              //!< OS in gzip header
+        uint8_t *extra;           //!< Extra field in gzip header
+        uint32_t extra_buf_len;   //!< Length of extra buffer
+        uint32_t extra_len;       //!< Actual length of gzip header extra field
+        char *name;               //!< Name in gzip header
+        uint32_t name_buf_len;    //!< Length of name buffer
+        char *comment;            //!< Comments in gzip header
+        uint32_t comment_buf_len; //!< Length of comment buffer
+        uint32_t hcrc;            //!< Header crc or header crc flag
+        uint32_t flags;           //!< Internal data
 };
 
 /* Variable prefixes:
@@ -358,65 +358,67 @@ struct isal_gzip_header {
 
 /** @brief Holds the internal state information for input and output compression streams*/
 struct isal_zstate {
-	uint32_t total_in_start; //!< Not used, may be replaced with something else
-	uint32_t block_next;	//!< Start of current deflate block in the input
-	uint32_t block_end;	//!< End of current deflate block in the input
-	uint32_t dist_mask;	//!< Distance mask used.
-	uint32_t hash_mask;
-	enum isal_zstate_state state;	//!< Current state in processing the data stream
-	struct BitBuf2 bitbuf;	//!< Bit Buffer
-	uint32_t crc;		//!< Current checksum without finalize step if any (adler)
-	uint8_t has_wrap_hdr;	//!< keeps track of wrapper header
-	uint8_t has_eob_hdr;	//!< keeps track of eob hdr (with BFINAL set)
-	uint8_t has_eob;	//!< keeps track of eob on the last deflate block
-	uint8_t has_hist;	//!< flag to track if there is match history
-	uint16_t has_level_buf_init; //!< flag to track if user supplied memory has been initialized.
-	uint32_t count;	//!< used for partial header/trailer writes
-	uint8_t tmp_out_buff[16];	//!< temporary array
-	uint32_t tmp_out_start;	//!< temporary variable
-	uint32_t tmp_out_end;	//!< temporary variable
-	uint32_t b_bytes_valid;	//!< number of valid bytes in buffer
-	uint32_t b_bytes_processed;	//!< number of bytes processed in buffer
-	uint8_t buffer[2 * IGZIP_HIST_SIZE + ISAL_LOOK_AHEAD];	//!< Internal buffer
+        uint32_t total_in_start; //!< Not used, may be replaced with something else
+        uint32_t block_next;     //!< Start of current deflate block in the input
+        uint32_t block_end;      //!< End of current deflate block in the input
+        uint32_t dist_mask;      //!< Distance mask used.
+        uint32_t hash_mask;
+        enum isal_zstate_state state; //!< Current state in processing the data stream
+        struct BitBuf2 bitbuf;        //!< Bit Buffer
+        uint32_t crc;                 //!< Current checksum without finalize step if any (adler)
+        uint8_t has_wrap_hdr;         //!< keeps track of wrapper header
+        uint8_t has_eob_hdr;          //!< keeps track of eob hdr (with BFINAL set)
+        uint8_t has_eob;              //!< keeps track of eob on the last deflate block
+        uint8_t has_hist;             //!< flag to track if there is match history
+        uint16_t
+                has_level_buf_init; //!< flag to track if user supplied memory has been initialized.
+        uint32_t count;             //!< used for partial header/trailer writes
+        uint8_t tmp_out_buff[16];   //!< temporary array
+        uint32_t tmp_out_start;     //!< temporary variable
+        uint32_t tmp_out_end;       //!< temporary variable
+        uint32_t b_bytes_valid;     //!< number of valid bytes in buffer
+        uint32_t b_bytes_processed; //!< number of bytes processed in buffer
+        uint8_t buffer[2 * IGZIP_HIST_SIZE + ISAL_LOOK_AHEAD]; //!< Internal buffer
 
-	/* Stream should be setup such that the head is cache aligned*/
-	uint16_t head[IGZIP_LVL0_HASH_SIZE];	//!< Hash array
+        /* Stream should be setup such that the head is cache aligned*/
+        uint16_t head[IGZIP_LVL0_HASH_SIZE]; //!< Hash array
 };
 
 /** @brief Holds the huffman tree used to huffman encode the input stream **/
 struct isal_hufftables {
 
-	uint8_t deflate_hdr[ISAL_DEF_MAX_HDR_SIZE]; //!< deflate huffman tree header
-	uint32_t deflate_hdr_count; //!< Number of whole bytes in deflate_huff_hdr
-	uint32_t deflate_hdr_extra_bits; //!< Number of bits in the partial byte in header
-	uint32_t dist_table[IGZIP_DIST_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are the code
-	uint32_t len_table[IGZIP_LEN_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are the code
-	uint16_t lit_table[IGZIP_LIT_TABLE_SIZE]; //!< literal code
-	uint8_t lit_table_sizes[IGZIP_LIT_TABLE_SIZE]; //!< literal code length
-	uint16_t dcodes[30 - IGZIP_DECODE_OFFSET]; //!< distance code
-	uint8_t dcodes_sizes[30 - IGZIP_DECODE_OFFSET]; //!< distance code length
-
+        uint8_t deflate_hdr[ISAL_DEF_MAX_HDR_SIZE]; //!< deflate huffman tree header
+        uint32_t deflate_hdr_count;                 //!< Number of whole bytes in deflate_huff_hdr
+        uint32_t deflate_hdr_extra_bits; //!< Number of bits in the partial byte in header
+        uint32_t dist_table[IGZIP_DIST_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are
+                                                    //!< the code
+        uint32_t len_table[IGZIP_LEN_TABLE_SIZE];   //!< bits 4:0 are the code length, bits 31:5 are
+                                                    //!< the code
+        uint16_t lit_table[IGZIP_LIT_TABLE_SIZE];   //!< literal code
+        uint8_t lit_table_sizes[IGZIP_LIT_TABLE_SIZE];  //!< literal code length
+        uint16_t dcodes[30 - IGZIP_DECODE_OFFSET];      //!< distance code
+        uint8_t dcodes_sizes[30 - IGZIP_DECODE_OFFSET]; //!< distance code length
 };
 
 /** @brief Holds stream information*/
 struct isal_zstream {
-	uint8_t *next_in;	//!< Next input byte
-	uint32_t avail_in;	//!< number of bytes available at next_in
-	uint32_t total_in;	//!< total number of bytes read so far
+        uint8_t *next_in;  //!< Next input byte
+        uint32_t avail_in; //!< number of bytes available at next_in
+        uint32_t total_in; //!< total number of bytes read so far
 
-	uint8_t *next_out;	//!< Next output byte
-	uint32_t avail_out;	//!< number of bytes available at next_out
-	uint32_t total_out;	//!< total number of bytes written so far
+        uint8_t *next_out;  //!< Next output byte
+        uint32_t avail_out; //!< number of bytes available at next_out
+        uint32_t total_out; //!< total number of bytes written so far
 
-	struct isal_hufftables *hufftables; //!< Huffman encoding used when compressing
-	uint32_t level; //!< Compression level to use
-	uint32_t level_buf_size; //!< Size of level_buf
-	uint8_t * level_buf; //!< User allocated buffer required for different compression levels
-	uint16_t end_of_stream;	//!< non-zero if this is the last input buffer
-	uint16_t flush;	//!< Flush type can be NO_FLUSH, SYNC_FLUSH or FULL_FLUSH
-	uint16_t gzip_flag; //!< Indicate if gzip compression is to be performed
-	uint16_t hist_bits; //!< Log base 2 of maximum lookback distance, 0 is use default
-	struct isal_zstate internal_state;	//!< Internal state for this stream
+        struct isal_hufftables *hufftables; //!< Huffman encoding used when compressing
+        uint32_t level;                     //!< Compression level to use
+        uint32_t level_buf_size;            //!< Size of level_buf
+        uint8_t *level_buf;     //!< User allocated buffer required for different compression levels
+        uint16_t end_of_stream; //!< non-zero if this is the last input buffer
+        uint16_t flush;         //!< Flush type can be NO_FLUSH, SYNC_FLUSH or FULL_FLUSH
+        uint16_t gzip_flag;     //!< Indicate if gzip compression is to be performed
+        uint16_t hist_bits;     //!< Log base 2 of maximum lookback distance, 0 is use default
+        struct isal_zstate internal_state; //!< Internal state for this stream
 };
 
 /******************************************************************************/
@@ -482,8 +484,10 @@ struct isal_zstream {
 #define ISAL_L_DUP ((1 << ISAL_L_REM) - (ISAL_L_REM + 1))
 #define ISAL_S_DUP ((1 << ISAL_S_REM) - (ISAL_S_REM + 1))
 
-#define ISAL_L_UNUSED ((1 << ISAL_L_REM) - (1 << ((ISAL_L_REM)/2)) - (1 << ((ISAL_L_REM + 1)/2)) + 1)
-#define ISAL_S_UNUSED ((1 << ISAL_S_REM) - (1 << ((ISAL_S_REM)/2)) - (1 << ((ISAL_S_REM + 1)/2)) + 1)
+#define ISAL_L_UNUSED                                                                              \
+        ((1 << ISAL_L_REM) - (1 << ((ISAL_L_REM) / 2)) - (1 << ((ISAL_L_REM + 1) / 2)) + 1)
+#define ISAL_S_UNUSED                                                                              \
+        ((1 << ISAL_S_REM) - (1 << ((ISAL_S_REM) / 2)) - (1 << ((ISAL_S_REM + 1) / 2)) + 1)
 
 #define ISAL_L_SIZE (ISAL_DEF_LIT_LEN_SYMBOLS + ISAL_L_DUP + ISAL_L_UNUSED)
 #define ISAL_S_SIZE (ISAL_DEF_DIST_SYMBOLS + ISAL_S_DUP + ISAL_S_UNUSED)
@@ -493,48 +497,52 @@ struct isal_zstream {
 
 /** @brief Large lookup table for decoding huffman codes */
 struct inflate_huff_code_large {
-	uint32_t short_code_lookup[1 << (ISAL_DECODE_LONG_BITS)];       //!< Short code lookup table
-	uint16_t long_code_lookup[ISAL_HUFF_CODE_LARGE_LONG_ALIGNED];   //!< Long code lookup table
+        uint32_t short_code_lookup[1 << (ISAL_DECODE_LONG_BITS)];     //!< Short code lookup table
+        uint16_t long_code_lookup[ISAL_HUFF_CODE_LARGE_LONG_ALIGNED]; //!< Long code lookup table
 };
 
 /** @brief Small lookup table for decoding huffman codes */
 struct inflate_huff_code_small {
-	uint16_t short_code_lookup[1 << (ISAL_DECODE_SHORT_BITS)];      //!<Short code lookup table
-	uint16_t long_code_lookup[ISAL_HUFF_CODE_SMALL_LONG_ALIGNED];   //!< Long code lookup table
+        uint16_t short_code_lookup[1 << (ISAL_DECODE_SHORT_BITS)];    //!< Short code lookup table
+        uint16_t long_code_lookup[ISAL_HUFF_CODE_SMALL_LONG_ALIGNED]; //!< Long code lookup table
 };
 
 /** @brief Holds decompression state information*/
 struct inflate_state {
-	uint8_t *next_out;	//!< Next output Byte
-	uint32_t avail_out;	//!< Number of bytes available at next_out
-	uint32_t total_out;	//!< Total bytes written out so far
-	uint8_t *next_in;	//!< Next input byte
-	uint64_t read_in;	//!< Bits buffered to handle unaligned streams
-	uint32_t avail_in;	//!< Number of bytes available at next_in
-	int32_t read_in_length;	//!< Bits in read_in
-	struct inflate_huff_code_large lit_huff_code;	//!< Structure for decoding lit/len symbols
-	struct inflate_huff_code_small dist_huff_code;	//!< Structure for decoding dist symbols
-	enum isal_block_state block_state;	//!< Current decompression state
-	uint32_t dict_length;	//!< Length of dictionary used
-	uint32_t bfinal;	//!< Flag identifying final block
-	uint32_t crc_flag;	//!< Flag identifying whether to track of crc
-	uint32_t crc;		//!< Contains crc or adler32 of output if crc_flag is set
-	uint32_t hist_bits; //!< Log base 2 of maximum lookback distance
-	union {
-		int32_t type0_block_len;	//!< Length left to read of type 0 block when outbuffer overflow occurred
-		int32_t count; //!< Count of bytes remaining to be parsed
-		uint32_t dict_id;
-	};
-	int32_t write_overflow_lits;
-	int32_t write_overflow_len;
-	int32_t copy_overflow_length; 	//!< Length left to copy when outbuffer overflow occurred
-	int32_t copy_overflow_distance;	//!< Lookback distance when outbuffer overflow occurred
-	int16_t wrapper_flag;
-	int16_t tmp_in_size;	//!< Number of bytes in tmp_in_buffer
-	int32_t tmp_out_valid;	//!< Number of bytes in tmp_out_buffer
-	int32_t tmp_out_processed;	//!< Number of bytes processed in tmp_out_buffer
-	uint8_t tmp_in_buffer[ISAL_DEF_MAX_HDR_SIZE];	//!< Temporary buffer containing data from the input stream
-	uint8_t tmp_out_buffer[2 * ISAL_DEF_HIST_SIZE + ISAL_LOOK_AHEAD]; 	//!< Temporary buffer containing data from the output stream
+        uint8_t *next_out;                            //!< Next output Byte
+        uint32_t avail_out;                           //!< Number of bytes available at next_out
+        uint32_t total_out;                           //!< Total bytes written out so far
+        uint8_t *next_in;                             //!< Next input byte
+        uint64_t read_in;                             //!< Bits buffered to handle unaligned streams
+        uint32_t avail_in;                            //!< Number of bytes available at next_in
+        int32_t read_in_length;                       //!< Bits in read_in
+        struct inflate_huff_code_large lit_huff_code; //!< Structure for decoding lit/len symbols
+        struct inflate_huff_code_small dist_huff_code; //!< Structure for decoding dist symbols
+        enum isal_block_state block_state;             //!< Current decompression state
+        uint32_t dict_length;                          //!< Length of dictionary used
+        uint32_t bfinal;                               //!< Flag identifying final block
+        uint32_t crc_flag;                             //!< Flag identifying whether to track of crc
+        uint32_t crc;       //!< Contains crc or adler32 of output if crc_flag is set
+        uint32_t hist_bits; //!< Log base 2 of maximum lookback distance
+        union {
+                int32_t type0_block_len; //!< Length left to read of type 0 block when outbuffer
+                                         //!< overflow occurred
+                int32_t count;           //!< Count of bytes remaining to be parsed
+                uint32_t dict_id;
+        };
+        int32_t write_overflow_lits;
+        int32_t write_overflow_len;
+        int32_t copy_overflow_length;   //!< Length left to copy when outbuffer overflow occurred
+        int32_t copy_overflow_distance; //!< Lookback distance when outbuffer overflow occurred
+        int16_t wrapper_flag;
+        int16_t tmp_in_size;       //!< Number of bytes in tmp_in_buffer
+        int32_t tmp_out_valid;     //!< Number of bytes in tmp_out_buffer
+        int32_t tmp_out_processed; //!< Number of bytes processed in tmp_out_buffer
+        uint8_t tmp_in_buffer[ISAL_DEF_MAX_HDR_SIZE]; //!< Temporary buffer containing data from the
+                                                      //!< input stream
+        uint8_t tmp_out_buffer[2 * ISAL_DEF_HIST_SIZE +
+                               ISAL_LOOK_AHEAD]; //!< Temporary buffer containing data from the
+                                                 //!< output stream
 };
 
 /******************************************************************************/
@@ -551,8 +559,8 @@ struct inflate_state {
  * @param length: The length of start_stream.
  * @param histogram: The returned histogram of lit/len/dist symbols.
  */
-void isal_update_histogram(uint8_t * in_stream, int length, struct isal_huff_histogram * histogram);
-
+void
+isal_update_histogram(uint8_t *in_stream, int length, struct isal_huff_histogram *histogram);
 
 /**
  * @brief Creates a custom huffman code for the given histograms in which
@@ -564,8 +572,8 @@ void isal_update_histogram(uint8_t * in_stream, int length, struct isal_huff_his
  *        repeat lengths and lookback distances
  * @returns Returns a non zero value if an invalid huffman code was created.
  */
-int isal_create_hufftables(struct isal_hufftables * hufftables,
-			struct isal_huff_histogram * histogram);
+int
+isal_create_hufftables(struct isal_hufftables *hufftables, struct isal_huff_histogram *histogram);
 
 /**
  * @brief Creates a custom huffman code for the given histograms like
@@ -577,8 +585,9 @@ int isal_create_hufftables(struct isal_hufftables * hufftables,
  *        repeat lengths and lookback distances
  * @returns Returns a non zero value if an invalid huffman code was created.
  */
-int isal_create_hufftables_subset(struct isal_hufftables * hufftables,
-				struct isal_huff_histogram * histogram);
+int
+isal_create_hufftables_subset(struct isal_hufftables *hufftables,
+                              struct isal_huff_histogram *histogram);
 
 /**
  * @brief Initialize compression stream data structure
@@ -586,7 +595,8 @@ int isal_create_hufftables_subset(struct isal_hufftables * hufftables,
  * @param stream Structure holding state information on the compression streams.
  * @returns none
  */
-void isal_deflate_init(struct isal_zstream *stream);
+void
+isal_deflate_init(struct isal_zstream *stream);
 
 /**
  * @brief Reinitialize compression stream data structure. Performs the same
@@ -597,22 +607,24 @@ void isal_deflate_init(struct isal_zstream *stream);
  * @param stream Structure holding state information on the compression streams.
  * @returns none
  */
-void isal_deflate_reset(struct isal_zstream *stream);
-
+void
+isal_deflate_reset(struct isal_zstream *stream);
 
 /**
  * @brief Set gzip header default values
  *
  * @param gz_hdr: Gzip header to initialize.
  */
-void isal_gzip_header_init(struct isal_gzip_header *gz_hdr);
+void
+isal_gzip_header_init(struct isal_gzip_header *gz_hdr);
 
 /**
  * @brief Set zlib header default values
  *
  * @param z_hdr: zlib header to initialize.
  */
-void isal_zlib_header_init(struct isal_zlib_header *z_hdr);
+void
+isal_zlib_header_init(struct isal_zlib_header *z_hdr);
 
 /**
  * @brief Write gzip header to output stream
@@ -629,7 +641,8 @@ void isal_zlib_header_init(struct isal_zlib_header *z_hdr);
  * the minimum size required to successfully write the gzip header to the output
  * buffer.
  */
-uint32_t isal_write_gzip_header(struct isal_zstream * stream, struct isal_gzip_header *gz_hdr);
+uint32_t
+isal_write_gzip_header(struct isal_zstream *stream, struct isal_gzip_header *gz_hdr);
 
 /**
  * @brief Write zlib header to output stream
@@ -646,7 +659,8 @@ uint32_t isal_write_gzip_header(struct isal_zstream * stream, struct isal_gzip_h
  * the minimum size required to successfully write the zlib header to the output
  * buffer.
  */
-uint32_t isal_write_zlib_header(struct isal_zstream * stream, struct isal_zlib_header *z_hdr);
+uint32_t
+isal_write_zlib_header(struct isal_zstream *stream, struct isal_zlib_header *z_hdr);
 
 /**
  * @brief Set stream to use a new Huffman code
@@ -668,8 +682,9 @@ uint32_t isal_write_zlib_header(struct isal_zstream * stream, struct isal_zlib_h
  * due to the stream being in a state where changing the huffman code is not
  * allowed or an invalid input is provided.
  */
-int isal_deflate_set_hufftables(struct isal_zstream *stream,
-				struct isal_hufftables *hufftables, int type);
+int
+isal_deflate_set_hufftables(struct isal_zstream *stream, struct isal_hufftables *hufftables,
+                            int type);
 
 /**
  * @brief Initialize compression stream data structure
@@ -677,8 +692,8 @@ int isal_deflate_set_hufftables(struct isal_zstream *stream,
  * @param stream Structure holding state information on the compression streams.
  * @returns none
  */
-void isal_deflate_stateless_init(struct isal_zstream *stream);
-
+void
+isal_deflate_stateless_init(struct isal_zstream *stream);
 
 /**
  * @brief Set compression dictionary to use
@@ -694,17 +709,18 @@ void isal_deflate_stateless_init(struct isal_zstream *stream);
  * @returns COMP_OK,
  *          ISAL_INVALID_STATE (dictionary could not be set)
  */
-int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t *dict, uint32_t dict_len);
+int
+isal_deflate_set_dict(struct isal_zstream *stream, uint8_t *dict, uint32_t dict_len);
 
 /** @brief Structure for holding processed dictionary information */
 
 struct isal_dict {
-	uint32_t params;
-	uint32_t level;
-	uint32_t hist_size;
-	uint32_t hash_size;
-	uint8_t history[ISAL_DEF_HIST_SIZE];
-	uint16_t hashtable[IGZIP_LVL3_HASH_SIZE];
+        uint32_t params;
+        uint32_t level;
+        uint32_t hist_size;
+        uint32_t hash_size;
+        uint8_t history[ISAL_DEF_HIST_SIZE];
+        uint16_t hashtable[IGZIP_LVL3_HASH_SIZE];
 };
 
 /**
@@ -725,8 +741,9 @@ struct isal_dict {
  * @returns COMP_OK,
  *          ISAL_INVALID_STATE (dictionary could not be processed)
  */
-int isal_deflate_process_dict(struct isal_zstream *stream, struct isal_dict *dict_str,
-			uint8_t *dict, uint32_t dict_len);
+int
+isal_deflate_process_dict(struct isal_zstream *stream, struct isal_dict *dict_str, uint8_t *dict,
+                          uint32_t dict_len);
 
 /**
  * @brief Reset compression dictionary to use
@@ -745,8 +762,8 @@ int isal_deflate_process_dict(struct isal_zstream *stream, struct isal_dict *dic
  * @returns COMP_OK,
  *          ISAL_INVALID_STATE or other (dictionary could not be reset)
  */
-int isal_deflate_reset_dict(struct isal_zstream *stream, struct isal_dict *dict_str);
-
+int
+isal_deflate_reset_dict(struct isal_zstream *stream, struct isal_dict *dict_str);
 
 /**
  * @brief Fast data (deflate) compression for storage applications.
@@ -802,8 +819,8 @@ int isal_deflate_reset_dict(struct isal_zstream *stream, struct isal_dict *dict_
  *         ISAL_INVALID_LEVEL (if an invalid compression level is selected),
  *         ISAL_INVALID_LEVEL_BUF (if the level buffer is not large enough).
  */
-int isal_deflate(struct isal_zstream *stream);
-
+int
+isal_deflate(struct isal_zstream *stream);
 
 /**
  * @brief Fast data (deflate) stateless compression for storage applications.
@@ -832,8 +849,8 @@ int isal_deflate(struct isal_zstream *stream);
  *         ISAL_INVALID_LEVEL_BUF (if the level buffer is not large enough),
  *         STATELESS_OVERFLOW (if output buffer will not fit output).
  */
-int isal_deflate_stateless(struct isal_zstream *stream);
-
+int
+isal_deflate_stateless(struct isal_zstream *stream);
 
 /******************************************************************************/
 /* Inflate functions */
@@ -844,7 +861,8 @@ int isal_deflate_stateless(struct isal_zstream *stream);
  * @param state Structure holding state information on the compression streams.
  * @returns none
  */
-void isal_inflate_init(struct inflate_state *state);
+void
+isal_inflate_init(struct inflate_state *state);
 
 /**
  * @brief Reinitialize decompression state data structure
@@ -852,7 +870,8 @@ void isal_inflate_init(struct inflate_state *state);
  * @param state Structure holding state information on the compression streams.
  * @returns none
  */
-void isal_inflate_reset(struct inflate_state *state);
+void
+isal_inflate_reset(struct inflate_state *state);
 
 /**
  * @brief Set decompression dictionary to use
@@ -867,7 +886,8 @@ void isal_inflate_reset(struct inflate_state *state);
  * @returns COMP_OK,
  *          ISAL_INVALID_STATE (dictionary could not be set)
  */
-int isal_inflate_set_dict(struct inflate_state *state, uint8_t *dict, uint32_t dict_len);
+int
+isal_inflate_set_dict(struct inflate_state *state, uint8_t *dict, uint32_t dict_len);
 
 /**
  * @brief Read and return gzip header information
@@ -890,7 +910,8 @@ int isal_inflate_set_dict(struct inflate_state *state, uint8_t *dict, uint32_t d
  *          ISAL_UNSUPPORTED_METHOD (deflate is not the compression method),
  *          ISAL_INCORRECT_CHECKSUM (gzip header checksum was incorrect)
  */
-int isal_read_gzip_header (struct inflate_state *state, struct isal_gzip_header *gz_hdr);
+int
+isal_read_gzip_header(struct inflate_state *state, struct isal_gzip_header *gz_hdr);
 
 /**
  * @brief Read and return zlib header information
@@ -905,7 +926,8 @@ int isal_read_gzip_header (struct inflate_state *state, struct isal_gzip_header
  *          ISAL_UNSUPPORTED_METHOD (deflate is not the compression method),
  *          ISAL_INCORRECT_CHECKSUM (zlib header checksum was incorrect)
  */
-int isal_read_zlib_header (struct inflate_state *state, struct isal_zlib_header *zlib_hdr);
+int
+isal_read_zlib_header(struct inflate_state *state, struct isal_zlib_header *zlib_hdr);
 
 /**
  * @brief Fast data (deflate) decompression for storage applications.
@@ -953,7 +975,8 @@ int isal_read_zlib_header (struct inflate_state *state, struct isal_zlib_header
  *         ISAL_INCORRECT_CHECKSUM.
  */
 
-int isal_inflate(struct inflate_state *state);
+int
+isal_inflate(struct inflate_state *state);
 
 /**
  * @brief Fast data (deflate) stateless decompression for storage applications.
@@ -975,7 +998,8 @@ int isal_inflate(struct inflate_state *state);
  *         ISAL_UNSUPPORTED_METHOD,
  *         ISAL_INCORRECT_CHECKSUM.
  */
-int isal_inflate_stateless(struct inflate_state *state);
+int
+isal_inflate_stateless(struct inflate_state *state);
 
 /******************************************************************************/
 /* Other functions */
@@ -992,9 +1016,10 @@ int isal_inflate_stateless(struct inflate_state *state);
  *
  * @returns 32-bit Adler-32 checksum
  */
-uint32_t isal_adler32(uint32_t init, const unsigned char *buf, uint64_t len);
+uint32_t
+isal_adler32(uint32_t init, const unsigned char *buf, uint64_t len);
 
 #ifdef __cplusplus
 }
 #endif
-#endif	/* ifndef _IGZIP_H */
+#endif /* ifndef _IGZIP_H */
diff --git a/include/mem_routines.h b/include/mem_routines.h
index 3d23522..61c25be 100644
--- a/include/mem_routines.h
+++ b/include/mem_routines.h
@@ -36,7 +36,6 @@
  *  Defines the interface for vector versions of common memory functions.
  */
 
-
 #ifndef _MEM_ROUTINES_H_
 #define _MEM_ROUTINES_H_
 
@@ -54,11 +53,11 @@ extern "C" {
  * @returns  0     - region is all zeros
  *           other - region has non zero bytes
  */
-int isal_zero_detect(void *mem, size_t len);
+int
+isal_zero_detect(void *mem, size_t len);
 
 #ifdef __cplusplus
 }
 #endif
 
 #endif // _MEM_ROUTINES_H_
-
diff --git a/include/raid.h b/include/raid.h
index e826694..690b8fc 100644
--- a/include/raid.h
+++ b/include/raid.h
@@ -27,7 +27,6 @@
   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 **********************************************************************/
 
-
 #ifndef _RAID_H_
 #define _RAID_H_
 
@@ -61,8 +60,8 @@ extern "C" {
  * @returns 0 pass, other fail
  */
 
-int xor_gen(int vects, int len, void **array);
-
+int
+xor_gen(int vects, int len, void **array);
 
 /**
  * @brief Checks that array has XOR parity sum of 0 across all vectors, runs appropriate version.
@@ -78,8 +77,8 @@ int xor_gen(int vects, int len, void **array);
  * @returns 0 pass, other fail
  */
 
-int xor_check(int vects, int len, void **array);
-
+int
+xor_check(int vects, int len, void **array);
 
 /**
  * @brief Generate P+Q parity vectors from N sources, runs appropriate version.
@@ -98,11 +97,12 @@ int xor_check(int vects, int len, void **array);
  * @returns 0 pass, other fail
  */
 
-int pq_gen(int vects, int len, void **array);
-
+int
+pq_gen(int vects, int len, void **array);
 
 /**
- * @brief Checks that array of N sources, P and Q are consistent across all vectors, runs appropriate version.
+ * @brief Checks that array of N sources, P and Q are consistent across all vectors, runs
+ * appropriate version.
  *
  * This function determines what instruction sets are enabled and
  * selects the appropriate version at runtime.
@@ -116,8 +116,8 @@ int pq_gen(int vects, int len, void **array);
  * @returns 0 pass, other fail
  */
 
-int pq_check(int vects, int len, void **array);
-
+int
+pq_check(int vects, int len, void **array);
 
 /* Arch specific versions */
 // x86 only
@@ -136,8 +136,8 @@ int pq_check(int vects, int len, void **array);
  * @returns 0 pass, other fail
  */
 
-int xor_gen_sse(int vects, int len, void **array);
-
+int
+xor_gen_sse(int vects, int len, void **array);
 
 /**
  * @brief Generate XOR parity vector from N sources.
@@ -152,8 +152,8 @@ int xor_gen_sse(int vects, int len, void **array);
  * @returns 0 pass, other fail
  */
 
-int xor_gen_avx(int vects, int len, void **array);
-
+int
+xor_gen_avx(int vects, int len, void **array);
 
 /**
  * @brief Checks that array has XOR parity sum of 0 across all vectors.
@@ -167,8 +167,8 @@ int xor_gen_avx(int vects, int len, void **array);
  * @returns 0 pass, other fail
  */
 
-int xor_check_sse(int vects, int len, void **array);
-
+int
+xor_check_sse(int vects, int len, void **array);
 
 /**
  * @brief Generate P+Q parity vectors from N sources.
@@ -185,8 +185,8 @@ int xor_check_sse(int vects, int len, void **array);
  * @returns 0 pass, other fail
  */
 
-int pq_gen_sse(int vects, int len, void **array);
-
+int
+pq_gen_sse(int vects, int len, void **array);
 
 /**
  * @brief Generate P+Q parity vectors from N sources.
@@ -203,8 +203,8 @@ int pq_gen_sse(int vects, int len, void **array);
  * @returns 0 pass, other fail
  */
 
-int pq_gen_avx(int vects, int len, void **array);
-
+int
+pq_gen_avx(int vects, int len, void **array);
 
 /**
  * @brief Generate P+Q parity vectors from N sources.
@@ -221,8 +221,8 @@ int pq_gen_avx(int vects, int len, void **array);
  * @returns 0 pass, other fail
  */
 
-int pq_gen_avx2(int vects, int len, void **array);
-
+int
+pq_gen_avx2(int vects, int len, void **array);
 
 /**
  * @brief Checks that array of N sources, P and Q are consistent across all vectors.
@@ -236,7 +236,8 @@ int pq_gen_avx2(int vects, int len, void **array);
  * @returns 0 pass, other fail
  */
 
-int pq_check_sse(int vects, int len, void **array);
+int
+pq_check_sse(int vects, int len, void **array);
 
 #endif
 
@@ -253,8 +254,8 @@ int pq_check_sse(int vects, int len, void **array);
  * @returns 0 pass, other fail
  */
 
-int pq_gen_base(int vects, int len, void **array);
-
+int
+pq_gen_base(int vects, int len, void **array);
 
 /**
  * @brief Generate XOR parity vector from N sources, runs baseline version.
@@ -267,8 +268,8 @@ int pq_gen_base(int vects, int len, void **array);
  * @returns 0 pass, other fail
  */
 
-int xor_gen_base(int vects, int len, void **array);
-
+int
+xor_gen_base(int vects, int len, void **array);
 
 /**
  * @brief Checks that array has XOR parity sum of 0 across all vectors, runs baseline version.
@@ -281,11 +282,12 @@ int xor_gen_base(int vects, int len, void **array);
  * @returns 0 pass, other fail
  */
 
-int xor_check_base(int vects, int len, void **array);
-
+int
+xor_check_base(int vects, int len, void **array);
 
 /**
- * @brief Checks that array of N sources, P and Q are consistent across all vectors, runs baseline version.
+ * @brief Checks that array of N sources, P and Q are consistent across all vectors, runs baseline
+ * version.
  *
  * @param vects  Number of vectors in array including P&Q. Must be > 3.
  * @param len    Length of each vector in bytes. Must be 16B aligned.
@@ -296,7 +298,8 @@ int xor_check_base(int vects, int len, void **array);
  * @returns 0 pass, other fail
  */
 
-int pq_check_base(int vects, int len, void **array);
+int
+pq_check_base(int vects, int len, void **array);
 
 #ifdef __cplusplus
 }
diff --git a/include/test.h b/include/test.h
index 3229c44..9f32638 100644
--- a/include/test.h
+++ b/include/test.h
@@ -47,35 +47,39 @@ extern "C" {
 #include <stdint.h>
 
 #ifdef _MSC_VER
-# define inline __inline
+#define inline __inline
 #endif
 
 /* Make os-independent alignment attribute, alloc and free. */
-#if defined  __unix__ || defined __APPLE__
-# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
-# define __forceinline static inline
-# define aligned_free(x) free(x)
+#if defined __unix__ || defined __APPLE__
+#define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
+#define __forceinline                   static inline
+#define aligned_free(x)                 free(x)
 #else
-# ifdef __MINGW32__
-#   define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
-#   define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn)))
-#   define aligned_free(x) _aligned_free(x)
-# else
-#   define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
-#   define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn)))
-#   define aligned_free(x) _aligned_free(x)
-# endif
+#ifdef __MINGW32__
+#define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
+#define posix_memalign(p, algn, len)                                                               \
+        (NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn)))
+#define aligned_free(x) _aligned_free(x)
+#else
+#define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
+#define posix_memalign(p, algn, len)                                                               \
+        (NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn)))
+#define aligned_free(x) _aligned_free(x)
+#endif
 #endif
 
 #ifdef DEBUG
-# define DEBUG_PRINT(x) printf x
+#define DEBUG_PRINT(x) printf x
 #else
-# define DEBUG_PRINT(x) do {} while (0)
+#define DEBUG_PRINT(x)                                                                             \
+        do {                                                                                       \
+        } while (0)
 #endif
 
 /* Decide whether to use benchmark time as an approximation or a minimum. Fewer
  * calls to the timer are required for the approximation case.*/
-#define BENCHMARK_MIN_TIME 0
+#define BENCHMARK_MIN_TIME    0
 #define BENCHMARK_APPROX_TIME 1
 #ifndef BENCHMARK_TYPE
 #define BENCHMARK_TYPE BENCHMARK_MIN_TIME
@@ -86,228 +90,260 @@ extern "C" {
  * standardized clock source. To obtain a meaningful result it may be
  * necessary to fix the CPU clock to match the rtdsc tick rate.
  */
-# include <inttypes.h>
-# include <x86intrin.h>
-# define USE_CYCLES
+#include <inttypes.h>
+#include <x86intrin.h>
+#define USE_CYCLES
 #else
-# include <time.h>
+#include <time.h>
 #define USE_SECONDS
 #endif
 
 #ifdef USE_RDTSC
 #ifndef BENCHMARK_TIME
-# define BENCHMARK_TIME 6
+#define BENCHMARK_TIME 6
 #endif
-# define GHZ 1000000000
-# define UNIT_SCALE (GHZ)
-# define CALIBRATE_TIME (UNIT_SCALE / 2)
-static inline long long get_time(void) {
-	unsigned int dummy;
-	return __rdtscp(&dummy);
+#define GHZ            1000000000
+#define UNIT_SCALE     (GHZ)
+#define CALIBRATE_TIME (UNIT_SCALE / 2)
+static inline long long
+get_time(void)
+{
+        unsigned int dummy;
+        return __rdtscp(&dummy);
 }
 
-static inline long long get_res(void) {
-	return 1;
+static inline long long
+get_res(void)
+{
+        return 1;
 }
 #else
 #ifndef BENCHMARK_TIME
-# define BENCHMARK_TIME 3
+#define BENCHMARK_TIME 3
 #endif
 #ifdef _MSC_VER
-#define UNIT_SCALE get_res()
+#define UNIT_SCALE     get_res()
 #define CALIBRATE_TIME (UNIT_SCALE / 4)
-static inline long long get_time(void) {
-	long long ret = 0;
-	QueryPerformanceCounter(&ret);
-	return ret;
+static inline long long
+get_time(void)
+{
+        long long ret = 0;
+        QueryPerformanceCounter(&ret);
+        return ret;
 }
 
-static inline long long get_res(void) {
-	long long ret = 0;
-	QueryPerformanceFrequency(&ret);
-	return ret;
+static inline long long
+get_res(void)
+{
+        long long ret = 0;
+        QueryPerformanceFrequency(&ret);
+        return ret;
 }
 #else
-# define NANO_SCALE 1000000000
-# define UNIT_SCALE NANO_SCALE
-# define CALIBRATE_TIME (UNIT_SCALE / 4)
+#define NANO_SCALE     1000000000
+#define UNIT_SCALE     NANO_SCALE
+#define CALIBRATE_TIME (UNIT_SCALE / 4)
 #ifdef __FreeBSD__
-# define CLOCK_ID CLOCK_MONOTONIC_PRECISE
+#define CLOCK_ID CLOCK_MONOTONIC_PRECISE
 #else
-# define CLOCK_ID CLOCK_MONOTONIC
+#define CLOCK_ID CLOCK_MONOTONIC
 #endif
 
-static inline long long get_time(void) {
-	struct timespec time;
-	long long nano_total;
-	 clock_gettime(CLOCK_ID, &time);
-	 nano_total = time.tv_sec;
-	 nano_total *= NANO_SCALE;
-	 nano_total += time.tv_nsec;
-	 return nano_total;
+static inline long long
+get_time(void)
+{
+        struct timespec time;
+        long long nano_total;
+        clock_gettime(CLOCK_ID, &time);
+        nano_total = time.tv_sec;
+        nano_total *= NANO_SCALE;
+        nano_total += time.tv_nsec;
+        return nano_total;
 }
 
-static inline long long get_res(void) {
-	struct timespec time;
-	long long nano_total;
-	clock_getres(CLOCK_ID, &time);
-	nano_total = time.tv_sec;
-	nano_total *= NANO_SCALE;
-	nano_total += time.tv_nsec;
-	return nano_total;
+static inline long long
+get_res(void)
+{
+        struct timespec time;
+        long long nano_total;
+        clock_getres(CLOCK_ID, &time);
+        nano_total = time.tv_sec;
+        nano_total *= NANO_SCALE;
+        nano_total += time.tv_nsec;
+        return nano_total;
 }
 #endif
 #endif
 struct perf {
-	long long start;
-	long long stop;
-	long long run_total;
-	long long iterations;
+        long long start;
+        long long stop;
+        long long run_total;
+        long long iterations;
 };
 
-static inline void perf_init(struct perf *p) {
-	p->start = 0;
-	p->stop = 0;
-	p->run_total = 0;
+static inline void
+perf_init(struct perf *p)
+{
+        p->start = 0;
+        p->stop = 0;
+        p->run_total = 0;
 }
 
-static inline void perf_continue(struct perf *p) {
-	p->start = get_time();
+static inline void
+perf_continue(struct perf *p)
+{
+        p->start = get_time();
 }
 
-static inline void perf_pause(struct perf *p) {
-	p->stop = get_time();
-	p->run_total = p->run_total + p->stop - p->start;
-	p->start = p->stop;
+static inline void
+perf_pause(struct perf *p)
+{
+        p->stop = get_time();
+        p->run_total = p->run_total + p->stop - p->start;
+        p->start = p->stop;
 }
 
-static inline void perf_start(struct perf *p) {
-	perf_init(p);
-	perf_continue(p);
+static inline void
+perf_start(struct perf *p)
+{
+        perf_init(p);
+        perf_continue(p);
 }
 
-static inline void perf_stop(struct perf *p) {
-	perf_pause(p);
+static inline void
+perf_stop(struct perf *p)
+{
+        perf_pause(p);
 }
 
-static inline double get_time_elapsed(struct perf *p) {
-	return 1.0 * p->run_total / UNIT_SCALE;
+static inline double
+get_time_elapsed(struct perf *p)
+{
+        return 1.0 * p->run_total / UNIT_SCALE;
 }
 
-static inline long long get_base_elapsed(struct perf *p) {
-	return p->run_total;
+static inline long long
+get_base_elapsed(struct perf *p)
+{
+        return p->run_total;
 }
 
-static inline unsigned long long estimate_perf_iterations(struct perf *p,
-						   unsigned long long runs,
-						   unsigned long long total) {
-	total = total * runs;
-	if (get_base_elapsed(p) > 0)
-		return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p);
-	else
-		return (total + get_res() - 1) / get_res();
+static inline unsigned long long
+estimate_perf_iterations(struct perf *p, unsigned long long runs, unsigned long long total)
+{
+        total = total * runs;
+        if (get_base_elapsed(p) > 0)
+                return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p);
+        else
+                return (total + get_res() - 1) / get_res();
 }
 
-#define CALIBRATE(PERF, FUNC_CALL) {				\
-	unsigned long long _i, _iter = 1;			\
-	perf_start(PERF);					\
-	FUNC_CALL;						\
-	perf_pause(PERF);					\
-								\
-	while (get_base_elapsed(PERF) < CALIBRATE_TIME) {	\
-		_iter = estimate_perf_iterations(PERF, _iter,	\
-						2 * CALIBRATE_TIME);	\
-		perf_start(PERF);				\
-		for (_i = 0; _i < _iter; _i++) {		\
-			FUNC_CALL;				\
-		}						\
-		perf_stop(PERF);				\
-	}							\
-	(PERF)->iterations=_iter;				\
-}
+#define CALIBRATE(PERF, FUNC_CALL)                                                                 \
+        {                                                                                          \
+                unsigned long long _i, _iter = 1;                                                  \
+                perf_start(PERF);                                                                  \
+                FUNC_CALL;                                                                         \
+                perf_pause(PERF);                                                                  \
+                                                                                                   \
+                while (get_base_elapsed(PERF) < CALIBRATE_TIME) {                                  \
+                        _iter = estimate_perf_iterations(PERF, _iter, 2 * CALIBRATE_TIME);         \
+                        perf_start(PERF);                                                          \
+                        for (_i = 0; _i < _iter; _i++) {                                           \
+                                FUNC_CALL;                                                         \
+                        }                                                                          \
+                        perf_stop(PERF);                                                           \
+                }                                                                                  \
+                (PERF)->iterations = _iter;                                                        \
+        }
 
-#define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL) {		\
-	unsigned long long _i, _iter = (PERF)->iterations;	\
-	unsigned long long _run_total = RUN_TIME;		\
-	_run_total *= UNIT_SCALE;				\
-	_iter = estimate_perf_iterations(PERF, _iter, _run_total);\
-	(PERF)->iterations = 0;					\
-	perf_start(PERF);					\
-	for (_i = 0; _i < _iter; _i++) {			\
-		FUNC_CALL;					\
-	}							\
-	perf_pause(PERF);					\
-	(PERF)->iterations += _iter;				\
-								\
-	if(get_base_elapsed(PERF) < _run_total &&		\
-		BENCHMARK_TYPE == BENCHMARK_MIN_TIME) {		\
-		_iter = estimate_perf_iterations(PERF, _iter,	\
-			_run_total - get_base_elapsed(PERF) +	\
-			(UNIT_SCALE / 16));			\
-		perf_continue(PERF);				\
-		for (_i = 0; _i < _iter; _i++) {		\
-			FUNC_CALL;				\
-		}						\
-		perf_pause(PERF);				\
-		(PERF)->iterations += _iter;			\
-	}							\
-}
+#define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL)                                                \
+        {                                                                                          \
+                unsigned long long _i, _iter = (PERF)->iterations;                                 \
+                unsigned long long _run_total = RUN_TIME;                                          \
+                _run_total *= UNIT_SCALE;                                                          \
+                _iter = estimate_perf_iterations(PERF, _iter, _run_total);                         \
+                (PERF)->iterations = 0;                                                            \
+                perf_start(PERF);                                                                  \
+                for (_i = 0; _i < _iter; _i++) {                                                   \
+                        FUNC_CALL;                                                                 \
+                }                                                                                  \
+                perf_pause(PERF);                                                                  \
+                (PERF)->iterations += _iter;                                                       \
+                                                                                                   \
+                if (get_base_elapsed(PERF) < _run_total && BENCHMARK_TYPE == BENCHMARK_MIN_TIME) { \
+                        _iter = estimate_perf_iterations(PERF, _iter,                              \
+                                                         _run_total - get_base_elapsed(PERF) +     \
+                                                                 (UNIT_SCALE / 16));               \
+                        perf_continue(PERF);                                                       \
+                        for (_i = 0; _i < _iter; _i++) {                                           \
+                                FUNC_CALL;                                                         \
+                        }                                                                          \
+                        perf_pause(PERF);                                                          \
+                        (PERF)->iterations += _iter;                                               \
+                }                                                                                  \
+        }
 
-#define BENCHMARK(PERF, RUN_TIME, FUNC_CALL) {			\
-	if((RUN_TIME) > 0) {					\
-		CALIBRATE(PERF, FUNC_CALL);			\
-		PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL);	\
-								\
-	} else {						\
-		(PERF)->iterations = 1;				\
-		perf_start(PERF);				\
-		FUNC_CALL;					\
-		perf_stop(PERF);				\
-	}							\
-}
+#define BENCHMARK(PERF, RUN_TIME, FUNC_CALL)                                                       \
+        {                                                                                          \
+                if ((RUN_TIME) > 0) {                                                              \
+                        CALIBRATE(PERF, FUNC_CALL);                                                \
+                        PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL);                               \
+                                                                                                   \
+                } else {                                                                           \
+                        (PERF)->iterations = 1;                                                    \
+                        perf_start(PERF);                                                          \
+                        FUNC_CALL;                                                                 \
+                        perf_stop(PERF);                                                           \
+                }                                                                                  \
+        }
 
 #ifdef USE_CYCLES
-static inline void perf_print(struct perf p, long long unit_count) {
-	long long total_units = p.iterations * unit_count;
+static inline void
+perf_print(struct perf p, long long unit_count)
+{
+        long long total_units = p.iterations * unit_count;
 
-	printf("runtime = %10lld ticks", get_base_elapsed(&p));
-	if (total_units != 0) {
-		printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte",
-		       total_units / (1000000), get_time_elapsed(&p),
-		       get_base_elapsed(&p) / (double)total_units);
-	}
-	printf("\n");
+        printf("runtime = %10lld ticks", get_base_elapsed(&p));
+        if (total_units != 0) {
+                printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte", total_units / (1000000),
+                       get_time_elapsed(&p), get_base_elapsed(&p) / (double) total_units);
+        }
+        printf("\n");
 }
 #else
-static inline void perf_print(struct perf p, double unit_count) {
-	long long total_units = p.iterations * unit_count;
-	long long usecs = (long long)(get_time_elapsed(&p) * 1000000);
+static inline void
+perf_print(struct perf p, double unit_count)
+{
+        long long total_units = p.iterations * unit_count;
+        long long usecs = (long long) (get_time_elapsed(&p) * 1000000);
 
-	printf("runtime = %10lld usecs", usecs);
-	if (total_units != 0) {
-		printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s",
-		       total_units / (1000000), get_time_elapsed(&p),
-		       ((double)total_units) / (1000000 * get_time_elapsed(&p)));
-	}
-	printf("\n");
+        printf("runtime = %10lld usecs", usecs);
+        if (total_units != 0) {
+                printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s", total_units / (1000000),
+                       get_time_elapsed(&p),
+                       ((double) total_units) / (1000000 * get_time_elapsed(&p)));
+        }
+        printf("\n");
 }
 #endif
 
-static inline uint64_t get_filesize(FILE * fp) {
-	uint64_t file_size;
-	fpos_t pos, pos_curr;
+static inline uint64_t
+get_filesize(FILE *fp)
+{
+        uint64_t file_size;
+        fpos_t pos, pos_curr;
 
-	fgetpos(fp, &pos_curr);	/* Save current position */
+        fgetpos(fp, &pos_curr); /* Save current position */
 #if defined(_WIN32) || defined(_WIN64)
-	_fseeki64(fp, 0, SEEK_END);
+        _fseeki64(fp, 0, SEEK_END);
 #else
-	fseeko(fp, 0, SEEK_END);
+        fseeko(fp, 0, SEEK_END);
 #endif
-	fgetpos(fp, &pos);
-	file_size = *(uint64_t *) & pos;
-	fsetpos(fp, &pos_curr);	/* Restore position */
+        fgetpos(fp, &pos);
+        file_size = *(uint64_t *) &pos;
+        fsetpos(fp, &pos_curr); /* Restore position */
 
-	return file_size;
+        return file_size;
 }
 
 #ifdef __cplusplus
diff --git a/include/unaligned.h b/include/unaligned.h
index 002cb32..e67bed2 100644
--- a/include/unaligned.h
+++ b/include/unaligned.h
@@ -37,168 +37,188 @@
 #ifdef __FreeBSD__
 #include <sys/types.h>
 #include <sys/endian.h>
-# define isal_bswap16(x) bswap16(x)
-# define isal_bswap32(x) bswap32(x)
-# define isal_bswap64(x) bswap64(x)
-#elif defined (__APPLE__)
+#define isal_bswap16(x) bswap16(x)
+#define isal_bswap32(x) bswap32(x)
+#define isal_bswap64(x) bswap64(x)
+#elif defined(__APPLE__)
 #include <libkern/OSByteOrder.h>
-# define isal_bswap16(x) OSSwapInt16(x)
-# define isal_bswap32(x) OSSwapInt32(x)
-# define isal_bswap64(x) OSSwapInt64(x)
-#elif defined (__GNUC__) && !defined (__MINGW32__)
-# include <byteswap.h>
-# define isal_bswap16(x) bswap_16(x)
-# define isal_bswap32(x) bswap_32(x)
-# define isal_bswap64(x) bswap_64(x)
+#define isal_bswap16(x) OSSwapInt16(x)
+#define isal_bswap32(x) OSSwapInt32(x)
+#define isal_bswap64(x) OSSwapInt64(x)
+#elif defined(__GNUC__) && !defined(__MINGW32__)
+#include <byteswap.h>
+#define isal_bswap16(x) bswap_16(x)
+#define isal_bswap32(x) bswap_32(x)
+#define isal_bswap64(x) bswap_64(x)
 #elif defined _WIN64
-# define isal_bswap16(x) _byteswap_ushort(x)
-# define isal_bswap32(x) _byteswap_ulong(x)
-# define isal_bswap64(x) _byteswap_uint64(x)
+#define isal_bswap16(x) _byteswap_ushort(x)
+#define isal_bswap32(x) _byteswap_ulong(x)
+#define isal_bswap64(x) _byteswap_uint64(x)
 #endif
 
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-# define to_be16(x) isal_bswap16(x)
-# define from_be16(x) isal_bswap16(x)
-# define to_be32(x) isal_bswap32(x)
-# define from_be32(x) isal_bswap32(x)
-# define to_be64(x) isal_bswap64(x)
-# define from_be64(x) isal_bswap64(x)
-# define to_le16(x) (x)
-# define from_le16(x) (x)
-# define to_le32(x) (x)
-# define from_le32(x) (x)
-# define to_le64(x) (x)
-# define from_le64(x) (x)
+#define to_be16(x)   isal_bswap16(x)
+#define from_be16(x) isal_bswap16(x)
+#define to_be32(x)   isal_bswap32(x)
+#define from_be32(x) isal_bswap32(x)
+#define to_be64(x)   isal_bswap64(x)
+#define from_be64(x) isal_bswap64(x)
+#define to_le16(x)   (x)
+#define from_le16(x) (x)
+#define to_le32(x)   (x)
+#define from_le32(x) (x)
+#define to_le64(x)   (x)
+#define from_le64(x) (x)
 #else
-# define to_be16(x) (x)
-# define from_be16(x) (x)
-# define to_be32(x) (x)
-# define from_be32(x) (x)
-# define to_be64(x) (x)
-# define from_be64(x) (x)
-# define to_le16(x) isal_bswap16(x)
-# define from_le16(x) isal_bswap16(x)
-# define to_le32(x) isal_bswap32(x)
-# define from_le32(x) isal_bswap32(x)
-# define to_le64(x) isal_bswap64(x)
-# define from_le64(x) isal_bswap64(x)
+#define to_be16(x)   (x)
+#define from_be16(x) (x)
+#define to_be32(x)   (x)
+#define from_be32(x) (x)
+#define to_be64(x)   (x)
+#define from_be64(x) (x)
+#define to_le16(x)   isal_bswap16(x)
+#define from_le16(x) isal_bswap16(x)
+#define to_le32(x)   isal_bswap32(x)
+#define from_le32(x) isal_bswap32(x)
+#define to_le64(x)   isal_bswap64(x)
+#define from_le64(x) isal_bswap64(x)
 #endif
 
-static inline uint16_t load_native_u16(uint8_t * buf)
+static inline uint16_t
+load_native_u16(uint8_t *buf)
 {
-	uint16_t ret;
-	memcpy(&ret, buf, sizeof(ret));
-	return ret;
+        uint16_t ret;
+        memcpy(&ret, buf, sizeof(ret));
+        return ret;
 }
 
-static inline uint16_t load_le_u16(uint8_t * buf)
+static inline uint16_t
+load_le_u16(uint8_t *buf)
 {
-	return from_le16(load_native_u16(buf));
+        return from_le16(load_native_u16(buf));
 }
 
-static inline uint16_t load_be_u16(uint8_t * buf)
+static inline uint16_t
+load_be_u16(uint8_t *buf)
 {
-	return from_be16(load_native_u16(buf));
+        return from_be16(load_native_u16(buf));
 }
 
-static inline uint32_t load_native_u32(uint8_t * buf)
+static inline uint32_t
+load_native_u32(uint8_t *buf)
 {
-	uint32_t ret;
-	memcpy(&ret, buf, sizeof(ret));
-	return ret;
+        uint32_t ret;
+        memcpy(&ret, buf, sizeof(ret));
+        return ret;
 }
 
-static inline uint32_t load_le_u32(uint8_t * buf)
+static inline uint32_t
+load_le_u32(uint8_t *buf)
 {
-	return from_le32(load_native_u32(buf));
+        return from_le32(load_native_u32(buf));
 }
 
-static inline uint32_t load_be_u32(uint8_t * buf)
+static inline uint32_t
+load_be_u32(uint8_t *buf)
 {
-	return from_be32(load_native_u32(buf));
+        return from_be32(load_native_u32(buf));
 }
 
-static inline uint64_t load_native_u64(uint8_t * buf)
+static inline uint64_t
+load_native_u64(uint8_t *buf)
 {
-	uint64_t ret;
-	memcpy(&ret, buf, sizeof(ret));
-	return ret;
+        uint64_t ret;
+        memcpy(&ret, buf, sizeof(ret));
+        return ret;
 }
 
-static inline uint64_t load_le_u64(uint8_t * buf)
+static inline uint64_t
+load_le_u64(uint8_t *buf)
 {
-	return from_le64(load_native_u64(buf));
+        return from_le64(load_native_u64(buf));
 }
 
-static inline uint64_t load_be_u64(uint8_t * buf)
+static inline uint64_t
+load_be_u64(uint8_t *buf)
 {
-	return from_be64(load_native_u64(buf));
+        return from_be64(load_native_u64(buf));
 }
 
-static inline uintmax_t load_le_umax(uint8_t * buf)
+static inline uintmax_t
+load_le_umax(uint8_t *buf)
 {
-	switch (sizeof(uintmax_t)) {
-	case sizeof(uint32_t):
-		return from_le32(load_native_u32(buf));
-	case sizeof(uint64_t):
-		return from_le64(load_native_u64(buf));
-	default:
-		return 0;
-	}
+        switch (sizeof(uintmax_t)) {
+        case sizeof(uint32_t):
+                return from_le32(load_native_u32(buf));
+        case sizeof(uint64_t):
+                return from_le64(load_native_u64(buf));
+        default:
+                return 0;
+        }
 }
 
-static inline void store_native_u16(uint8_t * buf, uint16_t val)
+static inline void
+store_native_u16(uint8_t *buf, uint16_t val)
 {
-	memcpy(buf, &val, sizeof(val));
+        memcpy(buf, &val, sizeof(val));
 }
 
-static inline void store_le_u16(uint8_t * buf, uint16_t val)
+static inline void
+store_le_u16(uint8_t *buf, uint16_t val)
 {
-	store_native_u16(buf, to_le16(val));
+        store_native_u16(buf, to_le16(val));
 }
 
-static inline void store_be_u16(uint8_t * buf, uint16_t val)
+static inline void
+store_be_u16(uint8_t *buf, uint16_t val)
 {
-	store_native_u16(buf, to_be16(val));
+        store_native_u16(buf, to_be16(val));
 }
 
-static inline void store_native_u16_to_u64(uint64_t * buf, uint16_t val)
+static inline void
+store_native_u16_to_u64(uint64_t *buf, uint16_t val)
 {
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-	store_native_u16((uint8_t *) buf, val);
+        store_native_u16((uint8_t *) buf, val);
 #else
-	store_native_u16((uint8_t *) buf + 6, val);
+        store_native_u16((uint8_t *) buf + 6, val);
 #endif
 }
 
-static inline void store_native_u32(uint8_t * buf, uint32_t val)
+static inline void
+store_native_u32(uint8_t *buf, uint32_t val)
 {
-	memcpy(buf, &val, sizeof(val));
+        memcpy(buf, &val, sizeof(val));
 }
 
-static inline void store_le_u32(uint8_t * buf, uint32_t val)
+static inline void
+store_le_u32(uint8_t *buf, uint32_t val)
 {
-	store_native_u32(buf, to_le32(val));
+        store_native_u32(buf, to_le32(val));
 }
 
-static inline void store_be_u32(uint8_t * buf, uint32_t val)
+static inline void
+store_be_u32(uint8_t *buf, uint32_t val)
 {
-	store_native_u32(buf, to_be32(val));
+        store_native_u32(buf, to_be32(val));
 }
 
-static inline void store_native_u64(uint8_t * buf, uint64_t val)
+static inline void
+store_native_u64(uint8_t *buf, uint64_t val)
 {
-	memcpy(buf, &val, sizeof(val));
+        memcpy(buf, &val, sizeof(val));
 }
 
-static inline void store_le_u64(uint8_t * buf, uint64_t val)
+static inline void
+store_le_u64(uint8_t *buf, uint64_t val)
 {
-	store_native_u64(buf, to_le64(val));
+        store_native_u64(buf, to_le64(val));
 }
 
-static inline void store_be_u64(uint8_t * buf, uint64_t val)
+static inline void
+store_be_u64(uint8_t *buf, uint64_t val)
 {
-	store_native_u64(buf, to_be64(val));
+        store_native_u64(buf, to_be64(val));
 }
 
 #endif