2009年12月15日
情報科学類 オペレーティングシステム II
                                       筑波大学 システム情報工学研究科 
                                       コンピュータサイエンス専攻, 電子・情報工学系
                                       新城 靖
                                       <yas@is.tsukuba.ac.jp>
このページは、次の URL にあります。
	http://www.coins.tsukuba.ac.jp/~yas/coins/literacy-2009/2009-12-15
あるいは、次のページから手繰っていくこともできます。
	http://www.coins.tsukuba.ac.jp/~yas/
	http://www.cs.tsukuba.ac.jp/~yas/
| 表示 | 説明 | 
| NI | Nice。優先度を表す値。 | 
% ps l 
F   UID   PID  PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
0  1013  5463  5462  15   0  91484  3636 rt_sig Ss   pts/3      0:00 -tcsh
0  1013  8378  5463  15   0  51236  2648 -      S    pts/3      0:00 xterm
0  1013  8380  8378  15   0  91188  3208 -      Ss+  pts/4      0:00 -csh
0  1013  9172  5463  16   0   8368   748 -      R+   pts/3      0:00 ps l
% /bin/nice  ps l 
F   UID   PID  PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
0  1013  5463  5462  15   0  91484  3636 rt_sig Ss   pts/3      0:00 -tcsh
0  1013  8378  5463  15   0  51236  2648 -      S    pts/3      0:00 xterm
0  1013  8380  8378  15   0  91188  3208 -      Ss+  pts/4      0:00 -csh
0  1013  9175  5463  26  10   8368   748 -      RN+  pts/3      0:00 ps l
% 
   1:	/*
   2:	        getpriority-pid.c -- 優先度の表示
   3:	        ~yas/syspro/proc/getpriority-pid.c
   4:	        Created on: 2009/12/14 12:15:11
   5:	*/
   6:	
   7:	#include <stdio.h>              /* stderr, fprintf() */
   8:	#include <sys/time.h>           /* getpriority() */
   9:	#include <sys/resource.h>       /* getpriority() */
  10:	#include <stdlib.h>             /* strtol() */
  11:	#include <limits.h>             /* strtol() */
  12:	
  13:	main( int argc, char *argv[] )
  14:	{
  15:	    int which, who, prio;
  16:	    pid_t pid;
  17:	        if( argc != 2 )
  18:	        {
  19:	            fprintf(stderr,"Usage: %% %s pid\n",argv[0] );
  20:	            exit( 1 );
  21:	        }
  22:	        pid = strtol( argv[1], NULL, 10 );
  23:	        prio = getpriority( PRIO_PROCESS, pid );
  24:	        printf("pid==%d, priority==%d\n", pid, prio);
  25:	}
% echo $$ 
5463
% ./getpriority-pid $$ 
pid==5463, priority==0
% ./getpriority-pid 0 
pid==0, priority==0
% /bin/nice -20 ./getpriority-pid 0 
pid==0, priority==19
% 
include/linux/sched.h
1166: struct task_struct {
1167:         volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
...
1181:         int prio, static_prio, normal_prio;
1182:         unsigned int rt_priority;
1183:         const struct sched_class *sched_class;
1184:         struct sched_entity se;
1185:         struct sched_rt_entity rt;
...
1206:         unsigned int policy;
...
1483: };
1089: struct sched_entity {
...
1091:         struct rb_node          run_node;
...
1093:         unsigned int            on_rq;
...
1097:         u64                     vruntime;
...
1148: };
struct task_struct の中に、struct sched_entity がある。
35: #define SCHED_NORMAL 0 36: #define SCHED_FIFO 1 37: #define SCHED_RR 2 38: #define SCHED_BATCH 3 39: /* SCHED_ISO: reserved but not implemented yet */ 40: #define SCHED_IDLE 5
kernel/sys.c 
 207: /*
 208:  * Ugh. To avoid negative return values, "getpriority()" will
 209:  * not return the normal nice-value, but a negated value that
 210:  * has been offset by 20 (ie it returns 40..1 instead of -20..19)
 211:  * to stay compatible.
 212:  */
 213: SYSCALL_DEFINE2(getpriority, int, which, int, who)
 214: {
 215:         struct task_struct *g, *p;
 216:         struct user_struct *user;
 217:         const struct cred *cred = current_cred();
 218:         long niceval, retval = -ESRCH;
 219:         struct pid *pgrp;
 220: 
 221:         if (which > PRIO_USER || which < PRIO_PROCESS)
 222:                 return -EINVAL;
 223: 
 224:         read_lock(&tasklist_lock);
 225:         switch (which) {
 226:                 case PRIO_PROCESS:
 227:                         if (who)
 228:                                 p = find_task_by_vpid(who);
 229:                         else
 230:                                 p = current;
 231:                         if (p) {
 232:                                 niceval = 20 - task_nice(p);
 233:                                 if (niceval > retval)
 234:                                         retval = niceval;
 235:                         }
 236:                         break;
 237:                 case PRIO_PGRP:
...
 248:                 case PRIO_USER:
...
 266:         }
 267: out_unlock:
 268:         read_unlock(&tasklist_lock);
 269: 
 270:         return retval;
 271: }
include/linux/sched.h:
1501: #define MAX_USER_RT_PRIO        100
1502: #define MAX_RT_PRIO             MAX_USER_RT_PRIO
1503: 
1504: #define MAX_PRIO                (MAX_RT_PRIO + 40)
1505: #define DEFAULT_PRIO            (MAX_RT_PRIO + 20)
kernel/sched.c
  89: #define NICE_TO_PRIO(nice)      (MAX_RT_PRIO + (nice) + 20)
  90: #define PRIO_TO_NICE(prio)      ((prio) - MAX_RT_PRIO - 20)
  91: #define TASK_NICE(p)            PRIO_TO_NICE((p)->static_prio)
...
6044: int task_nice(const struct task_struct *p)
6045: {
6046:         return TASK_NICE(p);
6047: }
6048: EXPORT_SYMBOL(task_nice);
1368: static const int prio_to_weight[40] = {
1369:  /* -20 */     88761,     71755,     56483,     46273,     36291,
1370:  /* -15 */     29154,     23254,     18705,     14949,     11916,
1371:  /* -10 */      9548,      7620,      6100,      4904,      3906,
1372:  /*  -5 */      3121,      2501,      1991,      1586,      1277,
1373:  /*   0 */      1024,       820,       655,       526,       423,
1374:  /*   5 */       335,       272,       215,       172,       137,
1375:  /*  10 */       110,        87,        70,        56,        45,
1376:  /*  15 */        36,        29,        23,        18,        15,
1377: };
1757: static void set_load_weight(struct task_struct *p)
1758: {
...
1774:         p->se.load.weight = prio_to_weight[p->static_prio - MAX_RT_PRIO];
1775:         p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
1776: }
優先度 static_prio は、struct sched_entity se の se.load.weight とその逆数
se.load.inv_weight に反映される。se.load.{weight,inv_weight} の値は、
後に、vruntime の計算の重みづけに使われる。
| 名前 | 説明 | 
|---|---|
| enqueue_task | プロセスが実行可能(runnable)になった | 
| dequeue_task | プロセスが実行可能ではなくなった | 
| yield_task | CPUを譲る。dequeueしてenqueue | 
| check_preempt_curr | 実行可能になった時にCPUを横取りすべきかをチェック | 
| pick_next_task | 次に実行すべきプロセスを選ぶ | 
| set_curr_task | スケジューリング・クラスが変更された | 
| task_tick | タイマ割込み(tick)の時に呼ばれる | 
| task_new | 新しいプロセスが生成された | 
1784: static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
1785: {
...
1790:         p->sched_class->enqueue_task(rq, p, wakeup);
1791:         p->se.on_rq = 1;
1792: }
1793: 
1794: static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
1795: {
...
1808:         p->sched_class->dequeue_task(rq, p, sleep);
1809:         p->se.on_rq = 0;
1810: }
kernel/sched.c
6078: static void
6079: __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
6080: {
6081:         BUG_ON(p->se.on_rq);
6082: 
6083:         p->policy = policy;
6084:         switch (p->policy) {
6085:         case SCHED_NORMAL:
6086:         case SCHED_BATCH:
6087:         case SCHED_IDLE:
6088:                 p->sched_class = &fair_sched_class;
6089:                 break;
6090:         case SCHED_FIFO:
6091:         case SCHED_RR:
6092:                 p->sched_class = &rt_sched_class;
6093:                 break;
6094:         }
6095: 
6096:         p->rt_priority = prio;
6097:         p->normal_prio = normal_prio(p);
6098:         /* we are holding p->pi_lock already */
6099:         p->prio = rt_mutex_getprio(p);
6100:         set_load_weight(p);
6101: }

図? runqueueの構造
kernel/sched.c
 565: struct rq {
...
 586:         struct cfs_rq cfs;
 587:         struct rt_rq rt;
...
 664: };
 420: struct cfs_rq {
...
 427:         struct rb_root tasks_timeline;
 428:         struct rb_node *rb_leftmost;
 429: 
 430:         struct list_head tasks;
...
 480: };
...
 666: static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
...
9068: static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
9069: {
9070:         cfs_rq->tasks_timeline = RB_ROOT;
...
9076: }

図? runqueueの構造(red-black tree)
kernel/sched_fair.c
 304: static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 305: {
 306:         struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
 307:         struct rb_node *parent = NULL;
 308:         struct sched_entity *entry;
 309:         s64 key = entity_key(cfs_rq, se);
 310:         int leftmost = 1;
 311: 
 312:         /*
 313:          * Find the right place in the rbtree:
 314:          */
 315:         while (*link) {
 316:                 parent = *link;
 317:                 entry = rb_entry(parent, struct sched_entity, run_node);
 318:                 /*
 319:                  * We dont care about collisions. Nodes with
 320:                  * the same key stay together.
 321:                  */
 322:                 if (key < entity_key(cfs_rq, entry)) {
 323:                         link = &parent->rb_left;
 324:                 } else {
 325:                         link = &parent->rb_right;
 326:                         leftmost = 0;
 327:                 }
 328:         }
 329: 
 330:         /*
 331:          * Maintain a cache of leftmost tree entries (it is frequently
 332:          * used):
 333:          */
 334:         if (leftmost)
 335:                 cfs_rq->rb_leftmost = &se->run_node;
 336: 
 337:         rb_link_node(&se->run_node, parent, link);
 338:         rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
 339: }
...
 275: static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
 276: {
 277:         return se->vruntime - cfs_rq->min_vruntime;
 278: }
kernel/sched.c
1079: static enum hrtimer_restart hrtick(struct hrtimer *timer)
1080: {
1081:         struct rq *rq = container_of(timer, struct rq, hrtick_timer);
...
1086:         update_rq_clock(rq);
1087:         rq->curr->sched_class->task_tick(rq, rq->curr, 1);
...
1091: }
kernel/sched_fair.c
1697: static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
1698: {
1699:         struct cfs_rq *cfs_rq;
1700:         struct sched_entity *se = &curr->se;
1701: 
1702:         for_each_sched_entity(se) {
1703:                 cfs_rq = cfs_rq_of(se);
1704:                 entity_tick(cfs_rq, se, queued);
1705:         }
1706: }
...
 866: entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 867: {
 871:         update_curr(cfs_rq);
...
 890:         if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT))
 891:                 check_preempt_tick(cfs_rq, curr);
 892: }
...
 484: static void update_curr(struct cfs_rq *cfs_rq)
 485: {
 486:         struct sched_entity *curr = cfs_rq->curr;
 487:         u64 now = rq_of(cfs_rq)->clock;
 488:         unsigned long delta_exec;
...
 498:         delta_exec = (unsigned long)(now - curr->exec_start);
...
 502:         __update_curr(cfs_rq, curr, delta_exec);
 503:         curr->exec_start = now;
 511: }
...
 469: static inline void
 470: __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 471:               unsigned long delta_exec)
 472: {
 473:         unsigned long delta_exec_weighted;
 474: 
 475:         schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
 476: 
 477:         curr->sum_exec_runtime += delta_exec;
 478:         schedstat_add(cfs_rq, exec_clock, delta_exec);
 479:         delta_exec_weighted = calc_delta_fair(delta_exec, curr);
 480:         curr->vruntime += delta_exec_weighted;
 481:         update_min_vruntime(cfs_rq);
 482: }

図? 4つの要素を持つリスト構造
同じ優先度を持つ要素を、二分探索木を用いて保持するとどうなるか。節と枝 (矢印)を用いて図示しなさい。ただし、木はバランスをしていなくても良い ものとする。注意: 答えは1つではない。