@@ -145,7 +145,23 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
145145 time_elapsed = update_time - j_cdbs -> prev_update_time ;
146146 j_cdbs -> prev_update_time = update_time ;
147147
148- idle_time = cur_idle_time - j_cdbs -> prev_cpu_idle ;
148+ /*
149+ * cur_idle_time could be smaller than j_cdbs->prev_cpu_idle if
150+ * it's obtained from get_cpu_idle_time_jiffy() when NOHZ is
151+ * off, where idle_time is calculated by the difference between
152+ * time elapsed in jiffies and "busy time" obtained from CPU
153+ * statistics. If a CPU is 100% busy, the time elapsed and busy
154+ * time should grow with the same amount in two consecutive
155+ * samples, but in practice there could be a tiny difference,
156+ * making the accumulated idle time decrease sometimes. Hence,
157+ * in this case, idle_time should be regarded as 0 in order to
158+ * make the further process correct.
159+ */
160+ if (cur_idle_time > j_cdbs -> prev_cpu_idle )
161+ idle_time = cur_idle_time - j_cdbs -> prev_cpu_idle ;
162+ else
163+ idle_time = 0 ;
164+
149165 j_cdbs -> prev_cpu_idle = cur_idle_time ;
150166
151167 if (ignore_nice ) {
@@ -162,7 +178,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
162178 * calls, so the previous load value can be used then.
163179 */
164180 load = j_cdbs -> prev_load ;
165- } else if (unlikely (( int ) idle_time > 2 * sampling_rate &&
181+ } else if (unlikely (idle_time > 2 * sampling_rate &&
166182 j_cdbs -> prev_load )) {
167183 /*
168184 * If the CPU had gone completely idle and a task has
@@ -189,30 +205,15 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
189205 load = j_cdbs -> prev_load ;
190206 j_cdbs -> prev_load = 0 ;
191207 } else {
192- if (time_elapsed >= idle_time ) {
208+ if (time_elapsed > idle_time )
193209 load = 100 * (time_elapsed - idle_time ) / time_elapsed ;
194- } else {
195- /*
196- * That can happen if idle_time is returned by
197- * get_cpu_idle_time_jiffy(). In that case
198- * idle_time is roughly equal to the difference
199- * between time_elapsed and "busy time" obtained
200- * from CPU statistics. Then, the "busy time"
201- * can end up being greater than time_elapsed
202- * (for example, if jiffies_64 and the CPU
203- * statistics are updated by different CPUs),
204- * so idle_time may in fact be negative. That
205- * means, though, that the CPU was busy all
206- * the time (on the rough average) during the
207- * last sampling interval and 100 can be
208- * returned as the load.
209- */
210- load = (int )idle_time < 0 ? 100 : 0 ;
211- }
210+ else
211+ load = 0 ;
212+
212213 j_cdbs -> prev_load = load ;
213214 }
214215
215- if (unlikely (( int ) idle_time > 2 * sampling_rate )) {
216+ if (unlikely (idle_time > 2 * sampling_rate )) {
216217 unsigned int periods = idle_time / sampling_rate ;
217218
218219 if (periods < idle_periods )
0 commit comments