✨ S_CURVE_FACTOR (#27101)

2025-11-09 00:48:09 +02:00
parent 90363cefd0
commit f946b60670
4 changed files with 71 additions and 25 deletions
@@ -1395,6 +1395,11 @@
 * See https://github.com/synthetos/TinyG/wiki/Jerk-Controlled-Motion-Explained
 */
 //#define S_CURVE_ACCELERATION
+#if ENABLED(S_CURVE_ACCELERATION)
+  // Define to use 4th instead of 6th order motion curve
+  //#define S_CURVE_FACTOR 0.25    // Initial and final acceleration factor, ideally 0.1 to 0.4.
+                                   // Shouldn't generally require tuning.
+#endif

 //===========================================================================
 //============================= Z Probe Options =============================
@@ -866,6 +866,16 @@ static_assert(COUNT(arm) == LOGICAL_AXES, "AXIS_RELATIVE_MODES must contain " _L
  #endif
 #endif // LIN_ADVANCE

+/**
+ * S_CURVE_ACCELERATION
+ */
+#if ENABLED(S_CURVE_ACCELERATION) && defined(S_CURVE_FACTOR)
+  #ifdef __AVR__
+    #error "S_CURVE_FACTOR is not yet implemented for AVR. Disable it to continue."
+  #endif
+  static_assert(WITHIN(S_CURVE_FACTOR, 0, 1), "S_CURVE_FACTOR must be between 0.0 and 1.0.");
+#endif
+
 /**
 * Linear Advance and FT Motion - Check K value range
 */
@@ -661,7 +661,7 @@ void Stepper::apply_directions() {
   *  a "linear pop" velocity curve; with pop being the sixth derivative of position:
   *  velocity - 1st, acceleration - 2nd, jerk - 3rd, snap - 4th, crackle - 5th, pop - 6th
   *
-   *  The Bézier curve takes the form:
+   *  The 6th order Bézier curve takes the form:
   *
   *  V(t) = P_0 * B_0(t) + P_1 * B_1(t) + P_2 * B_2(t) + P_3 * B_3(t) + P_4 * B_4(t) + P_5 * B_5(t)
   *
@@ -681,7 +681,10 @@ void Stepper::apply_directions() {
   *  Unfortunately, we cannot use forward-differencing to calculate each position through
   *  the curve, as Marlin uses variable timer periods. So, we require a formula of the form:
   *
+   *  6th order:
   *        V_f(t) = A*t^5 + B*t^4 + C*t^3 + D*t^2 + E*t + F
+   *  4th order:
+   *        V_f(t) = A*t^3 + B*t^2 + C*t + F
   *
   *  Looking at the above B_0(t) through B_5(t) expanded forms, if we take the coefficients of t^5
   *  through t of the Bézier form of V(t), we can determine that:
@@ -704,15 +707,27 @@ void Stepper::apply_directions() {
   *        E = 0
   *        F = P_i
   *
+   *  For 4th order we want the initial and final acceleration to be a fixed S_CURVE_FACTOR fraction
+   *  and solving gives us:
+   *
+   *        A = 2*(1 - S_CURVE_FACTOR)*(P_i - P_t)
+   *        B = 3*(1 - S_CURVE_FACTOR)*(P_t - P_i)
+   *        C = S_CURVE_FACTOR*(P_t - P_i)
+   *        F = P_i
+   *
   *  As the t is evaluated in non uniform steps here, there is no other way rather than evaluating
   *  the Bézier curve at each point:
   *
+   *  6th order:
   *        V_f(t) = A*t^5 + B*t^4 + C*t^3 + F          [0 <= t <= 1]
+   *  4th order:
+   *        V_f(t) = A*t^3 + B*t^2 + C*t + F            [0 <= t <= 1]
   *
   * Floating point arithmetic execution time cost is prohibitive, so we will transform the math to
-   * use fixed point values to be able to evaluate it in realtime. Assuming a maximum of 250000 steps
-   * per second (driver pulses should at least be 2µS hi/2µS lo), and allocating 2 bits to avoid
-   * overflows on the evaluation of the Bézier curve, means we can use
+   * use fixed point values to be able to evaluate it in realtime.
+   *
+   * 6th order: Assumes a maximum of 250000 steps/s (driver pulses down to 2µS hi/2µS lo),
+   * and allocates 2 bits to avoid overflows on the evaluation of the Bézier curve:
   *
   *   t: unsigned Q0.32 (0 <= t < 1) |range 0 to 0xFFFFFFFF unsigned
   *   A:   signed Q24.7 ,            |range = +/- 250000 * 6 * 128 = +/- 192000000 = 0x0B71B000 | 28 bits + sign
@@ -720,6 +735,8 @@ void Stepper::apply_directions() {
   *   C:   signed Q24.7 ,            |range = +/- 250000 *10 * 128 = +/- 320000000 = 0x1312D000 | 29 bits + sign
   *   F:   signed Q24.7 ,            |range = +/- 250000     * 128 =      32000000 = 0x01E84800 | 25 bits + sign
   *
+   * 4th order: With B coefficient at least 5x smaller the maximum will be ~1250000 steps/s.
+   *
   * The trapezoid generator state contains the following information, that we will use to create and evaluate
   * the Bézier curve:
   *
@@ -734,9 +751,15 @@ void Stepper::apply_directions() {
   *
   *    At the start of each trapezoid, calculate the coefficients A,B,C,F and Advance [AV], as follows:
   *
+   *  6th order:
   *      A =  6*128*(VF - VI) =  768*(VF - VI)
   *      B = 15*128*(VI - VF) = 1920*(VI - VF)
   *      C = 10*128*(VF - VI) = 1280*(VF - VI)
+   *  4th order:
+   *      A =  2*(1-S_CURVE_FACTOR)*128*(VI - VF)
+   *      B =  3*(1-S_CURVE_FACTOR)*128*(VF - VI)
+   *      C =  S_CURVE_FACTOR*128*(VF - VI)
+   *  both:
   *      F =    128*VI        =  128*VI
   *     AV = (1<<32)/TS      ~= 0xFFFFFFFF / TS (To use ARM UDIV, that is 32 bits) (this is computed at the planner, to offload expensive calculations from the ISR)
   *
@@ -1385,9 +1408,17 @@ void Stepper::apply_directions() {
    // For all the other 32bit CPUs
    FORCE_INLINE void Stepper::_calc_bezier_curve_coeffs(const int32_t v0, const int32_t v1, const uint32_t av) {
      // Calculate the Bézier coefficients
-      bezier_A =  768 * (v1 - v0);
-      bezier_B = 1920 * (v0 - v1);
-      bezier_C = 1280 * (v1 - v0);
+      #ifndef S_CURVE_FACTOR
+        bezier_A =  768 * (v1 - v0);
+        bezier_B = 1920 * (v0 - v1);
+        bezier_C = 1280 * (v1 - v0);
+      #else
+        // Must convert from S_CURVE_FACTOR to int only once.
+        constexpr int FACTOR128 = S_CURVE_FACTOR * 128;
+        bezier_A = (2 * (128 - FACTOR128)) * (v0 - v1);
+        bezier_B = (3 * (128 - FACTOR128)) * (v1 - v0);
+        bezier_C = FACTOR128 * (v1 - v0);
+      #endif
      bezier_F =  128 * v0;
      bezier_AV = av;
    }
@@ -1409,8 +1440,10 @@ void Stepper::apply_directions() {
          ".syntax unified" "\n\t"              // is to prevent CM0,CM1 non-unified syntax
          A("lsrs  %[ahi],%[alo],#1")           // a  = F << 31      1 cycles
          A("lsls  %[alo],%[alo],#31")          //                   1 cycles
-          A("umull %[flo],%[fhi],%[fhi],%[t]")  // f *= t            5 cycles [fhi:flo=64bits]
-          A("umull %[flo],%[fhi],%[fhi],%[t]")  // f>>=32; f*=t      5 cycles [fhi:flo=64bits]
+          #ifndef S_CURVE_FACTOR
+            A("umull %[flo],%[fhi],%[fhi],%[t]")  // f *= t          5 cycles [fhi:flo=64bits]
+            A("umull %[flo],%[fhi],%[fhi],%[t]")  // f>>=32; f*=t    5 cycles [fhi:flo=64bits]
+          #endif
          A("lsrs  %[flo],%[fhi],#1")           //                   1 cycles [31bits]
          A("smlal %[alo],%[ahi],%[flo],%[C]")  // a+=(f>>33)*C;     5 cycles
          A("umull %[flo],%[fhi],%[fhi],%[t]")  // f>>=32; f*=t      5 cycles [fhi:flo=64bits]
@@ -1438,21 +1471,19 @@ void Stepper::apply_directions() {
        // For non ARM targets, we provide a fallback implementation. Really doubt it
        // will be useful, unless the processor is fast and 32bit

-        uint32_t t = bezier_AV * curr_step;               // t: Range 0 - 1^32 = 32 bits
-        uint64_t f = t;
-        f *= t;                                           // Range 32*2 = 64 bits (unsigned)
-        f >>= 32;                                         // Range 32 bits  (unsigned)
-        f *= t;                                           // Range 32*2 = 64 bits  (unsigned)
-        f >>= 32;                                         // Range 32 bits : f = t^3  (unsigned)
-        int64_t acc = (int64_t) bezier_F << 31;           // Range 63 bits (signed)
-        acc += ((uint32_t) f >> 1) * (int64_t) bezier_C;  // Range 29bits + 31 = 60bits (plus sign)
-        f *= t;                                           // Range 32*2 = 64 bits
-        f >>= 32;                                         // Range 32 bits : f = t^3  (unsigned)
-        acc += ((uint32_t) f >> 1) * (int64_t) bezier_B;  // Range 29bits + 31 = 60bits (plus sign)
-        f *= t;                                           // Range 32*2 = 64 bits
-        f >>= 32;                                         // Range 32 bits : f = t^3  (unsigned)
-        acc += ((uint32_t) f >> 1) * (int64_t) bezier_A;  // Range 28bits + 31 = 59bits (plus sign)
-        acc >>= (31 + 7);                                 // Range 24bits (plus sign)
+        uint32_t t = bezier_AV * curr_step;           // t: Range 32 bits
+        uint64_t f = t;                               // f: Range 64 bits
+        #ifndef S_CURVE_FACTOR
+          f *= t; f >>= 32;                           // f = t^2  (unsigned)
+          f *= t; f >>= 32;                           // f = t^3  (unsigned)
+        #endif
+        int64_t acc = int64_t(bezier_F) << 31;        // Range 63 bits (signed)
+        acc += uint32_t(f >> 1) * int64_t(bezier_C);  // Range 29bits + 31 = 60bits (plus sign)
+        f *= t; f >>= 32;                             // f = t^2 or t^4  (unsigned)
+        acc += uint32_t(f >> 1) * int64_t(bezier_B);  // Range 29bits + 31 = 60bits (plus sign)
+        f *= t; f >>= 32;                             // f = t^3 or t^5  (unsigned)
+        acc += uint32_t(f >> 1) * int64_t(bezier_A);  // Range 28bits + 31 = 59bits (plus sign)
+        acc >>= (31 + 7);                             // Range 24bits (plus sign)
        return (int32_t) acc;

      #endif
@@ -459,7 +459,7 @@ class Stepper {
      static int32_t bezier_A,     // A coefficient in Bézier speed curve
                     bezier_B,     // B coefficient in Bézier speed curve
                     bezier_C;     // C coefficient in Bézier speed curve
-      static uint32_t bezier_F,    // F coefficient in Bézier speed curve
+      static uint32_t bezier_F,    // F/free coefficient in Bézier speed curve
                      bezier_AV;   // AV coefficient in Bézier speed curve
      #ifdef __AVR__
        static bool A_negative;    // If A coefficient was negative