From 4775dee66b13287db44790c96b98764f196d53bf Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Sun, 20 May 2018 11:55:54 -0400 Subject: [PATCH] Clarify in the docs that `mul_add` is not always faster. More info: - https://github.com/rust-lang/rust/issues/49842 - https://github.com/rust-lang/rust/pull/50572 --- src/float.rs | 6 ++++-- src/ops/mul_add.rs | 9 +++++---- src/real.rs | 6 ++++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/float.rs b/src/float.rs index 8df3254..4320543 100644 --- a/src/float.rs +++ b/src/float.rs @@ -1237,8 +1237,10 @@ pub trait Float fn is_sign_negative(self) -> bool; /// Fused multiply-add. Computes `(self * a) + b` with only one rounding - /// error. This produces a more accurate result with better performance than - /// a separate multiplication operation followed by an add. + /// error, yielding a more accurate result than an unfused multiply-add. + /// + /// Using `mul_add` can be more performant than an unfused multiply-add if + /// the target architecture has a dedicated `fma` CPU instruction. /// /// ``` /// use num_traits::Float; diff --git a/src/ops/mul_add.rs b/src/ops/mul_add.rs index f7cb71f..6e43f2f 100644 --- a/src/ops/mul_add.rs +++ b/src/ops/mul_add.rs @@ -1,7 +1,8 @@ -/// The fused multiply-add operation. -/// Computes (self * a) + b with only one rounding error. -/// This produces a more accurate result with better performance -/// than a separate multiplication operation followed by an add. +/// Fused multiply-add. Computes `(self * a) + b` with only one rounding +/// error, yielding a more accurate result than an unfused multiply-add. +/// +/// Using `mul_add` can be more performant than an unfused multiply-add if +/// the target architecture has a dedicated `fma` CPU instruction. /// /// Note that `A` and `B` are `Self` by default, but this is not mandatory. /// diff --git a/src/real.rs b/src/real.rs index fb6714a..8c713d3 100644 --- a/src/real.rs +++ b/src/real.rs @@ -215,8 +215,10 @@ pub trait Real fn is_sign_negative(self) -> bool; /// Fused multiply-add. Computes `(self * a) + b` with only one rounding - /// error. This produces a more accurate result with better performance than - /// a separate multiplication operation followed by an add. + /// error, yielding a more accurate result than an unfused multiply-add. + /// + /// Using `mul_add` can be more performant than an unfused multiply-add if + /// the target architecture has a dedicated `fma` CPU instruction. /// /// ``` /// use num_traits::real::Real;