2023年01月27日
情報科学類 オペレーティングシステム II
                                       筑波大学 システム情報系 
                                       新城 靖
                                       <yas@cs.tsukuba.ac.jp>
このページは、次の URL にあります。
	http://www.coins.tsukuba.ac.jp/~yas/coins/os2-2022/2023-01-27
あるいは、次のページから手繰っていくこともできます。
	http://www.coins.tsukuba.ac.jp/~yas/
	http://www.cs.tsukuba.ac.jp/~yas/
struct timeval {
    time_t      tv_sec;     /* seconds. long int */
    suseconds_t tv_usec;    /* microseconds. long int */
};
int gettimeofday(struct timeval *tp, struct timezone *tzp);
int settimeofday(const struct timeval *tp, const struct timezone *tzp);
使い方
   1:	/*
   2:	        gettimeofday-print.c -- get colander time and print
   3:	        Created on: 2014/01/22 20:40:34
   4:	*/
   5:	
   6:	#include <sys/time.h>   /* gettimeofday() */
   7:	#include <time.h>       /* ctime() */
   8:	#include <stdio.h>
   9:	
  10:	main()
  11:	{
  12:	    struct timeval tv;
  13:	    time_t sec;
  14:	        gettimeofday( &tv, NULL );
  15:	        sec = tv.tv_sec;
  16:	        printf("%s", ctime(&sec) );
  17:	}
$ make gettimeofday-print ![[←]](../icons/screen-return.gif) cc     gettimeofday-print.c   -o gettimeofday-print
$ ./gettimeofday-print
cc     gettimeofday-print.c   -o gettimeofday-print
$ ./gettimeofday-print ![[←]](../icons/screen-return.gif) Tue Jan 24 18:12:50 2023
$ date
Tue Jan 24 18:12:50 2023
$ date ![[←]](../icons/screen-return.gif) Tue Jan 24 18:12:52 JST 2023
$
Tue Jan 24 18:12:52 JST 2023
$ ![[]](../icons/screen-cursor.gif) 
POSIX 1003.1, 2003 の
struct timespec
では、ナノ秒単位。
struct timespec  {
    time_t tv_sec;            /* Seconds.  */
    long int tv_nsec;         /* Nanoseconds.  */
};
int clock_settime(clockid_t clock_id, const struct timespec *tp);
int clock_gettime(clockid_t clock_id, struct timespec *tp);
int clock_getres(clockid_t clock_id, struct timespec *res);
clock_id としては、CLOCK_REALTIME (カレンダ時刻)やCLOCK_MONOTONIC があ
る。
カレンダ時刻は、変更できる。逆走させることも可能。
順方向のジャンプや逆走を避けて、カレンダ時刻を合わせるには、adjtime() を使う。
int adjtime(const struct timeval *delta, struct timeval *olddelta);adjtime() を使った時刻同期の方法。
struct itimerval {
    struct timeval it_interval; /* next value */
    struct timeval it_value;    /* current value */
};
int setitimer(int which, const struct itimerval *value, 
    struct itimerval *ovalue);
int  select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
     		struct timeval *timeout);
int  poll(struct pollfd *fds, nfds_t nfds, int timeout);
int  epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout);
int  kevent(int kq, const struct kevent *changelist, int nchanges,
       struct kevent *eventlist, int nevents, const struct timespec *timeout);
ネットワーク・プログラムでよく使う。複数の入力を監視する。指定された時
間、入力がなければ、システム・コールから復帰する。
なにもしない時間切れ。
unsigned int sleep(unsigned int seconds); int usleep(useconds_t usec); int nanosleep(const struct timespec *rqtp, struct timespec *rmtp);

図? タイマ関連のハードウェアの基本モデル
2つの機能がある。
その他の割込み
linux-6.1.2/include/asm-generic/param.h 8: # define HZ CONFIG_HZ /* Internal kernel timer frequency */ linux-6.1.2/include/generated/autoconf.h 1209: #define CONFIG_HZ 1000 linux-6.1.2/include/linux/jiffies.h 79: extern u64 __cacheline_aligned_in_smp jiffies_64; 80: extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies;
$ nm vmlinux|grep 'D jiffies' ![[←]](../icons/screen-return.gif) ffffffff828079c0 D jiffies
ffffffff828079c0 D jiffies_64
ffffffff82807a40 D jiffies_lock
ffffffff82807a00 D jiffies_seq
$
ffffffff828079c0 D jiffies
ffffffff828079c0 D jiffies_64
ffffffff82807a40 D jiffies_lock
ffffffff82807a00 D jiffies_seq
$ ![[]](../icons/screen-cursor.gif) 
linux-6.1.2/kernel/time/tick-common.c
  85:	static void tick_periodic(int cpu)
  86:	{
  87:	        if (tick_do_timer_cpu == cpu) {
...
  94:	                do_timer(1);
...
  97:	                update_wall_time();
  98:	        }
  99:	
 100:	        update_process_times(user_mode(get_irq_regs()));
...
 102:	}
linux-6.1.2/kernel/time/timer.c
  60:	__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
linux-6.1.2/kernel/time/timekeeping.c
2289:	void do_timer(unsigned long ticks)
2290:	{
2291:	        jiffies_64 += ticks;
...
2293:	}
xtime_nsec >> shift でナノ秒を表す。
linux-6.1.2/include/linux/timekeeper_internal.h
  92:	struct timekeeper {
  93:	        struct tk_read_base     tkr_mono;
...
  95:	        u64                     xtime_sec;
...
 139:	};
  34:	struct tk_read_base {
  35:	        struct clocksource      *clock;
...
  39:	        u32                     shift;
  40:	        u64                     xtime_nsec;
...
  43:	};
linux-6.1.2/include/linux/time64.h
  13:	struct timespec64 {
  14:	        time64_t        tv_sec;                 /* seconds */
  15:	        long            tv_nsec;                /* nanoseconds */
  16:	};
linux-6.1.2/kernel/time/timekeeping.c
 129:	static inline struct timespec64 tk_xtime(const struct timekeeper *tk)
 130:	{
 131:	        struct timespec64 ts;
 132:	
 133:	        ts.tv_sec = tk->xtime_sec;
 134:	        ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
 135:	        return ts;
 136:	}
linux-6.1.2/kernel/time/time.c
 140:	SYSCALL_DEFINE2(gettimeofday, struct __kernel_old_timeval __user *, tv,
 141:	                struct timezone __user *, tz)
 142:	{
 143:	        if (likely(tv != NULL)) {
 144:	                struct timespec64 ts;
 145:	
 146:	                ktime_get_real_ts64(&ts);
 147:	                if (put_user(ts.tv_sec, &tv->tv_sec) ||
 148:	                    put_user(ts.tv_nsec / 1000, &tv->tv_usec))
 149:	                        return -EFAULT;
 150:	        }
 151:	        if (unlikely(tz != NULL)) {
 152:	                if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
 153:	                        return -EFAULT;
 154:	        }
 155:	        return 0;
 156:	}
linux-6.1.2/kernel/time/timekeeping.c
 815:	void ktime_get_real_ts64(struct timespec64 *ts)
 816:	{
 817:	        struct timekeeper *tk = &tk_core.timekeeper;
 818:	        unsigned int seq;
 819:	        u64 nsecs;
...
 826:	                ts->tv_sec = tk->xtime_sec;
 827:	                nsecs = timekeeping_get_ns(&tk->tkr_mono);
...
 831:	        ts->tv_nsec = 0;
 832:	        timespec64_add_ns(ts, nsecs);
 833:	}
linux-6.1.2/include/linux/timer.h
  11:	struct timer_list {
...
  17:	        unsigned long           expires;
  18:	        void                    (*function)(struct timer_list *);
...
  24:	};
jiffies が増加して expires に達すれば、(*function)(tl) を呼ぶ。 引数 tl は、struct timer_list *。
主に次の関数で操作する。
(*function)() で独自のデータ(以下の例では struct s1 *)を得るには、次の ように from_timer() マクロか container_of() マクロを用いる。
struct s1 {
	...
        struct timer_list s_timer;
	...
        int s_x;
	...
};
void timer_list_handler(struct timer_list *tl) {
	struct s1 *p1 = from_timer(p1, tl, s_timer);
	f( p1->s_x );
}

図? timer_list から外側の構造体を求める
linux-6.1.2/include/linux/timer.h
 153:	#define from_timer(var, callback_timer, timer_fieldname) \
 154:	        container_of(callback_timer, typeof(*var), timer_fieldname)
linux-6.1.2/include/linux/container_of.h
  17:	#define container_of(ptr, type, member) ({                              \
  18:	        void *__mptr = (void *)(ptr);                                   \
...
  22:	        ((type *)(__mptr - offsetof(type, member))); })
linux-6.1.2/kernel/time/timer.c
1853:	struct process_timer {
1854:	        struct timer_list timer;
1855:	        struct task_struct *task;
1856:	};
1896:	signed long __sched schedule_timeout(signed long timeout)
1897:	{
1898:	        struct process_timer timer;
1899:	        unsigned long expire;
...
1930:	        expire = timeout + jiffies;
1931:	
1932:	        timer.task = current;
1933:	        timer_setup_on_stack(&timer.timer, process_timeout, 0);
1934:	        __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING);
1935:	        schedule();
1936:	        del_singleshot_timer_sync(&timer.timer);
1937:	
1938:	        /* Remove the timer from the object tracker */
1939:	        destroy_timer_on_stack(&timer.timer);
1940:	
1941:	        timeout = expire - jiffies;
1942:	
1943:	 out:
1944:	        return timeout < 0 ? 0 : timeout;
1945:	}
1858:	static void process_timeout(struct timer_list *t)
1859:	{
1860:	        struct process_timer *timeout = from_timer(timeout, t, timer);
1861:	
1862:	        wake_up_process(timeout->task);
1863:	}
linux-6.1.2/include/linux/hrtimer.h
  39:	enum hrtimer_mode {
  40:	        HRTIMER_MODE_ABS        = 0x00,
  41:	        HRTIMER_MODE_REL        = 0x01,
...
  60:	};
  65:	enum hrtimer_restart {
  66:	        HRTIMER_NORESTART,      /* Timer is not restarted */
  67:	        HRTIMER_RESTART,        /* Timer must be restarted */
  68:	};
 118:	struct hrtimer {
...
 121:	        enum hrtimer_restart            (*function)(struct hrtimer *);
...
 127:	};
主に次の関数で操作する。
    struct hrtimer my_timer;
    hrtimer_init(&my_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
    my_timer.function = my_timer_handler;
    ...
    hrtimer_start(&my_timer,  ktime_set(0, t_nano), HRTIMER_MODE_REL);
    ...
enum hrtimer_restart my_timer_handler(struct hrtimer *timer)
{
        ...
        return HRTIMER_NORESTART;
}
例: Ethernet のドライバでモードを変更して 2 マイクロ秒だけ待つ。
様々な方法がある。
例1: 10 tick (インターバル・タイマによる割り込み)を待つ。
unsigned long timeout = jiffies + 10; // 10 ticks
while (time_before(jiffies,timeout))
    continue;
例2: 2秒待つ
unsigned long delay = jiffies + 2*HZ; // 2秒
while (time_before(jiffies,delay))
    continue;
unsigned long timeout = jiffies + 10; // 10 ticks
while (jiffies<timeout)
    continue;
引き算して 0 と比較すると、オーバフローの問題が解決できる。
unsigned long timeout = jiffies + 10; // 10 ticks
while (jiffies-timeout<0)
    continue;
次のマクロを使う方法もある。
linux-6.1.2/include/linux/jiffies.h 104: #define time_after(a,b) \ 105: (typecheck(unsigned long, a) && \ 106: typecheck(unsigned long, b) && \ 107: ((long)((b) - (a)) < 0)) 108: #define time_before(a,b) time_after(b,a) 109: 110: #define time_after_eq(a,b) \ 111: (typecheck(unsigned long, a) && \ 112: typecheck(unsigned long, b) && \ 113: ((long)((a) - (b)) >= 0)) 114: #define time_before_eq(a,b) time_after_eq(b,a)
unsigned long delay = jiffies + 2*HZ; // 2秒
while (time_before(jiffies,delay))
    cond_resched();
他に実行すべき重要なプロセスが存在する(条件)時には、スケジューラを呼ん
で、実行する。存在しなければ、空ループと同じ。ただし、スケジューラを呼
ぶ(sleepする可能性がある)ので、割り込みコンテキストからは使えない。
void ndelay(unsigned long nsecs) void udelay(unsigned long usecs) void mdelay(unsigned long msecs)udelay() は、ある回数のループで実装されている。回数は、CPUの速度等で決 まる。ndelay(), mdelay() は、udelay() を呼んでいる。
udelay() で1ミリ秒以上待ってはいけない。 ループのインデックスがオーバフローする可能性がある。
set_current_state( TASK_INTERRUPTIBLE ); // signal で起きる可能性がある schedule_timeout( s * HZ );実装には struct timer_list が使われている。
| 表示 | 説明 | 
| NI | Nice。優先度を表す値。 | 
$ /bin/ps l ![[←]](../icons/screen-return.gif) F   UID   PID  PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
0  1013 20638 20636  20   0 123572  2100 wait   Ss   pts/2      0:00 -bash
0  1013 21139 20638  20   0 155660  5900 poll_s S    pts/2      0:02 xterm -class UXTerm -title uxterm -u8
0  1013 21150 21139  20   0 123552  2144 wait   Ss   pts/3      0:00 bash
0  1013 21560 20638  20   0 267808 22928 poll_s S+   pts/2      0:09 emacs -nw
0  1013 21784 21150  20   0 103748   956 signal T    pts/3      0:00 lv kernel/time/timer.c
0  1013 27031 21150  20   0 108132   980 -      R+   pts/3      0:00 /bin/ps l
$ /bin/nice /bin/ps l
F   UID   PID  PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
0  1013 20638 20636  20   0 123572  2100 wait   Ss   pts/2      0:00 -bash
0  1013 21139 20638  20   0 155660  5900 poll_s S    pts/2      0:02 xterm -class UXTerm -title uxterm -u8
0  1013 21150 21139  20   0 123552  2144 wait   Ss   pts/3      0:00 bash
0  1013 21560 20638  20   0 267808 22928 poll_s S+   pts/2      0:09 emacs -nw
0  1013 21784 21150  20   0 103748   956 signal T    pts/3      0:00 lv kernel/time/timer.c
0  1013 27031 21150  20   0 108132   980 -      R+   pts/3      0:00 /bin/ps l
$ /bin/nice /bin/ps l ![[←]](../icons/screen-return.gif) F   UID   PID  PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
0  1013 20638 20636  20   0 123572  2100 wait   Ss   pts/2      0:00 -bash
0  1013 21139 20638  20   0 155660  5900 poll_s S    pts/2      0:02 xterm -class UXTerm -title uxterm -u8
0  1013 21150 21139  20   0 123552  2144 wait   Ss   pts/3      0:00 bash
0  1013 21560 20638  20   0 267808 22928 poll_s S+   pts/2      0:09 emacs -nw
0  1013 21784 21150  20   0 103748   956 signal T    pts/3      0:00 lv kernel/time/timer.c
0  1013 27034 21150  30  10 108136   984 -      RN+  pts/3      0:00 /bin/ps l
$ /bin/nice -19 /bin/ps l
F   UID   PID  PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
0  1013 20638 20636  20   0 123572  2100 wait   Ss   pts/2      0:00 -bash
0  1013 21139 20638  20   0 155660  5900 poll_s S    pts/2      0:02 xterm -class UXTerm -title uxterm -u8
0  1013 21150 21139  20   0 123552  2144 wait   Ss   pts/3      0:00 bash
0  1013 21560 20638  20   0 267808 22928 poll_s S+   pts/2      0:09 emacs -nw
0  1013 21784 21150  20   0 103748   956 signal T    pts/3      0:00 lv kernel/time/timer.c
0  1013 27034 21150  30  10 108136   984 -      RN+  pts/3      0:00 /bin/ps l
$ /bin/nice -19 /bin/ps l ![[←]](../icons/screen-return.gif) F   UID   PID  PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
0  1013 20638 20636  20   0 123572  2100 wait   Ss   pts/2      0:00 -bash
0  1013 21139 20638  20   0 155660  5900 -      R    pts/2      0:02 xterm -class UXTerm -title uxterm -u8
0  1013 21150 21139  20   0 123552  2144 wait   Ss   pts/3      0:00 bash
0  1013 21560 20638  20   0 267808 22928 poll_s S+   pts/2      0:09 emacs -nw
0  1013 21784 21150  20   0 103748   956 signal T    pts/3      0:00 lv kernel/time/timer.c
0  1013 27035 21150  39  19 108132   984 -      RN+  pts/3      0:00 /bin/ps l
$
F   UID   PID  PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
0  1013 20638 20636  20   0 123572  2100 wait   Ss   pts/2      0:00 -bash
0  1013 21139 20638  20   0 155660  5900 -      R    pts/2      0:02 xterm -class UXTerm -title uxterm -u8
0  1013 21150 21139  20   0 123552  2144 wait   Ss   pts/3      0:00 bash
0  1013 21560 20638  20   0 267808 22928 poll_s S+   pts/2      0:09 emacs -nw
0  1013 21784 21150  20   0 103748   956 signal T    pts/3      0:00 lv kernel/time/timer.c
0  1013 27035 21150  39  19 108132   984 -      RN+  pts/3      0:00 /bin/ps l
$ ![[]](../icons/screen-cursor.gif) 
   1:	/*
   2:	        getpriority-pid.c -- 優先度の表示
   3:	        ~yas/syspro/proc/getpriority-pid.c
   4:	        Created on: 2009/12/14 12:15:11
   5:	*/
   6:	
   7:	#include <stdio.h>              /* stderr, fprintf() */
   8:	#include <sys/time.h>           /* getpriority() */
   9:	#include <sys/resource.h>       /* getpriority() */
  10:	#include <stdlib.h>             /* strtol() */
  11:	#include <limits.h>             /* strtol() */
  12:	
  13:	main( int argc, char *argv[] )
  14:	{
  15:	    int which, who, prio;
  16:	    pid_t pid;
  17:	        if( argc != 2 )
  18:	        {
  19:	            fprintf(stderr,"Usage: %% %s pid\n",argv[0] );
  20:	            exit( 1 );
  21:	        }
  22:	        pid = strtol( argv[1], NULL, 10 );
  23:	        prio = getpriority( PRIO_PROCESS, pid );
  24:	        printf("pid==%d, priority==%d\n", pid, prio);
  25:	}
$ ./getpriority-pid  ![[←]](../icons/screen-return.gif) Usage: % ./getpriority-pid pid
$ echo $$
Usage: % ./getpriority-pid pid
$ echo $$ ![[←]](../icons/screen-return.gif) 21150
$ ./getpriority-pid
21150
$ ./getpriority-pid  ![[←]](../icons/screen-return.gif) Usage: % ./getpriority-pid pid
$ ./getpriority-pid $$
Usage: % ./getpriority-pid pid
$ ./getpriority-pid $$ ![[←]](../icons/screen-return.gif) pid==21150, priority==0
$ ./getpriority-pid 0
pid==21150, priority==0
$ ./getpriority-pid 0 ![[←]](../icons/screen-return.gif) pid==0, priority==0
$ /bin/nice -10 ./getpriority-pid 0
pid==0, priority==0
$ /bin/nice -10 ./getpriority-pid 0 ![[←]](../icons/screen-return.gif) pid==0, priority==10
$ /bin/nice -20 ./getpriority-pid 0
pid==0, priority==10
$ /bin/nice -20 ./getpriority-pid 0 ![[←]](../icons/screen-return.gif) pid==0, priority==19
$
pid==0, priority==19
$ ![[]](../icons/screen-cursor.gif) 
linux-6.1.2/include/linux/sched.h
 737:	struct task_struct {
...
 745:	        unsigned int                    __state;
...
 783:	        int                             prio;
 784:	        int                             static_prio;
 785:	        int                             normal_prio;
 786:	        unsigned int                    rt_priority;
 787:	
 788:	        struct sched_entity             se;
 789:	        struct sched_rt_entity          rt;
 790:	        struct sched_dl_entity          dl;
 791:	        const struct sched_class        *sched_class;
...
 827:	        unsigned int                    policy;
...
1546:	};
 547:	struct sched_entity {
...
 549:	        struct load_weight              load;
 550:	        struct rb_node                  run_node;
...
 552:	        unsigned int                    on_rq;
 553:	
 554:	        u64                             exec_start;
 555:	        u64                             sum_exec_runtime;
 556:	        u64                             vruntime;
...
 581:	};
 407:	struct load_weight {
 408:	        unsigned long                   weight;
 409:	        u32                             inv_weight;
 410:	};
struct task_struct の中に、prio 等のフィールドやstruct sched_entity が
ある。
struct sched_entity で重要なフィールド。詳しくは後述。
linux-6.1.2/include/uapi/linux/sched.h 114: #define SCHED_NORMAL 0 115: #define SCHED_FIFO 1 116: #define SCHED_RR 2 117: #define SCHED_BATCH 3 118: /* SCHED_ISO: reserved but not implemented yet */ 119: #define SCHED_IDLE 5 120: #define SCHED_DEADLINE 6
linux-6.1.2/kernel/sys.c
 281:	SYSCALL_DEFINE2(getpriority, int, which, int, who)
 282:	{
 283:	        struct task_struct *g, *p;
 284:	        struct user_struct *user;
 285:	        const struct cred *cred = current_cred();
 286:	        long niceval, retval = -ESRCH;
 287:	        struct pid *pgrp;
 288:	        kuid_t uid;
...
 294:	        switch (which) {
 295:	        case PRIO_PROCESS:
 296:	                if (who)
 297:	                        p = find_task_by_vpid(who);
 298:	                else
 299:	                        p = current;
 300:	                if (p) {
 301:	                        niceval = nice_to_rlimit(task_nice(p));
 302:	                        if (niceval > retval)
 303:	                                retval = niceval;
 304:	                }
 305:	                break;
 306:	        case PRIO_PGRP:
...
 319:	        case PRIO_USER:
...
 339:	        }
...
 343:	        return retval;
 344:	}
linux-6.1.2/include/linux/sched/prio.h
   5:	#define MAX_NICE        19
   6:	#define MIN_NICE        -20
   7:	#define NICE_WIDTH      (MAX_NICE - MIN_NICE + 1)
...
  16:	#define MAX_RT_PRIO             100
  17:	
  18:	#define MAX_PRIO                (MAX_RT_PRIO + NICE_WIDTH)
  19:	#define DEFAULT_PRIO            (MAX_RT_PRIO + NICE_WIDTH / 2)
...
  26:	#define NICE_TO_PRIO(nice)      ((nice) + DEFAULT_PRIO)
  27:	#define PRIO_TO_NICE(prio)      ((prio) - DEFAULT_PRIO)
1889:	/**
1890:	 * task_nice - return the nice value of a given task.
1891:	 * @p: the task in question.
1892:	 *
1893:	 * Return: The nice value [ -20 ... 0 ... 19 ].
1894:	 */
1895:	static inline int task_nice(const struct task_struct *p)
1896:	{
1897:	        return PRIO_TO_NICE((p)->static_prio);
1898:	}
linux-6.1.2/include/linux/sched/prio.h
  32:	static inline long nice_to_rlimit(long nice)
  33:	{
  34:	        return (MAX_NICE - nice + 1);
  35:	}
glibc-2.12/sysdeps/unix/sysv/linux/getpriority.c
  28:	#define PZERO 20
...
  35:	int
  36:	getpriority (enum __priority_which which, id_t who)
  37:	{
  38:	  int res;
  39:	
  40:	  res = INLINE_SYSCALL (getpriority, 2, (int) which, who);
  41:	  if (res >= 0)
  42:	    res = PZERO - res;
  43:	  return res;
  44:	}
linux-6.1.2/kernel/sched/core.c
11186:	/*
11187:	 * Nice levels are multiplicative, with a gentle 10% change for every
11188:	 * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
11189:	 * nice 1, it will get ~10% less CPU time than another CPU-bound task
11190:	 * that remained on nice 0.
11191:	 *
11192:	 * The "10% effect" is relative and cumulative: from _any_ nice level,
11193:	 * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
11194:	 * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
11195:	 * If a task goes up by ~10% and another task goes down by ~10% then
11196:	 * the relative distance between them is ~25%.)
11197:	 */
11198:	const int sched_prio_to_weight[40] = {
11199:	 /* -20 */     88761,     71755,     56483,     46273,     36291,
11200:	 /* -15 */     29154,     23254,     18705,     14949,     11916,
11201:	 /* -10 */      9548,      7620,      6100,      4904,      3906,
11202:	 /*  -5 */      3121,      2501,      1991,      1586,      1277,
11203:	 /*   0 */      1024,       820,       655,       526,       423,
11204:	 /*   5 */       335,       272,       215,       172,       137,
11205:	 /*  10 */       110,        87,        70,        56,        45,
11206:	 /*  15 */        36,        29,        23,        18,        15,
11207:	};
1260:	static void set_load_weight(struct task_struct *p, bool update_load)
1261:	{
1262:	        int prio = p->static_prio - MAX_RT_PRIO;
1263:	        struct load_weight *load = &p->se.load;
1264:	
...
1281:	                load->weight = scale_load(sched_prio_to_weight[prio]);
1282:	                load->inv_weight = sched_prio_to_wmult[prio];
...
1284:	}
linux-6.1.2/kernel/sched/sched.h
 400:	# define SCHED_FIXEDPOINT_SHIFT         10
 401:	# define SCHED_FIXEDPOINT_SCALE         (1L << SCHED_FIXEDPOINT_SHIFT)
linux-6.1.2/kernel/sched/sched.h
 144:	# define scale_load(w)          ((w) << SCHED_FIXEDPOINT_SHIFT)
| 名前 | 説明 | 
|---|---|
| enqueue_task | プロセスが実行可能(runnable)になった | 
| dequeue_task | プロセスが実行可能ではなくなった | 
| yield_task | CPUを譲る。dequeueしてenqueue | 
| check_preempt_curr | 実行可能になった時にCPUを横取りすべきかをチェック | 
| pick_next_task | 次に実行すべきプロセスを選ぶ | 
| set_curr_task | スケジューリング・クラスが変更された | 
| task_tick | タイマ割込み(tick)の時に呼ばれる | 
| task_new | 新しいプロセスが生成された | 
linux-6.1.2/kernel/sched/core.c
2049:	static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
2050:	{
...
2060:	        p->sched_class->enqueue_task(rq, p, flags);
...
2064:	}
2066:	static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
2067:	{
...
2080:	        p->sched_class->dequeue_task(rq, p, flags);
2081:	}
linux-6.1.2/kernel/sched/core.c
7433:	static int __sched_setscheduler(struct task_struct *p,
7434:	                                const struct sched_attr *attr,
7435:	                                bool user, bool pi)
7436:	{
...
7613:	                __setscheduler_params(p, attr);
7614:	                __setscheduler_prio(p, newprio);
...
7654:	}
7329:	static void __setscheduler_params(struct task_struct *p,
7330:	                const struct sched_attr *attr)
7331:	{
7332:	        int policy = attr->sched_policy;
7333:	
7334:	        if (policy == SETPARAM_POLICY)
7335:	                policy = p->policy;
7336:	
7337:	        p->policy = policy;
7338:	
7339:	        if (dl_policy(policy))
7340:	                __setparam_dl(p, attr);
7341:	        else if (fair_policy(policy))
7342:	                p->static_prio = NICE_TO_PRIO(attr->sched_nice);
...
7349:	        p->rt_priority = attr->sched_priority;
7350:	        p->normal_prio = normal_prio(p);
7351:	        set_load_weight(p, true);
7352:	}
6849:	static void __setscheduler_prio(struct task_struct *p, int prio)
6850:	{
6851:	        if (dl_prio(prio))
6852:	                p->sched_class = &dl_sched_class;
6853:	        else if (rt_prio(prio))
6854:	                p->sched_class = &rt_sched_class;
6855:	        else
6856:	                p->sched_class = &fair_sched_class;
6857:	
6858:	        p->prio = prio;
6859:	}
p->prio  の値に応じて
&dl_sched_class  か
&rt_sched_class  か
&fair_sched_class  のいずれかを指すようにする。
CPUバウンドのプロセスが複数存在した時、ある期間を定めて、この期間の間に、 (優先度を考慮して)公平になるようにCPU資源を割り当てる。この期間の間に、 1度はCPUを割り当てられるようにがんばる。この期間は、 kernel.sched_latency_ns で設定されている。以下の例では、15ミリ秒。
$ sysctl kernel.sched_latency_ns ![[←]](../icons/screen-return.gif) kernel.sched_latency_ns = 15000000
$
kernel.sched_latency_ns = 15000000
$ ![[]](../icons/screen-cursor.gif) 
たとえば、もし優先度が同じプロセスAとプロセスBが存在した時には、15ミリ
秒の間にプロセスAに7.5ミリ秒、プロセスBに7.5ミリ秒のCPU資源を割り当てる
ようにがんばる。
Linux CFS は、次の方法でスケジューリングを行なう。

図? runqueueの構造
linux-6.1.2/kernel/sched/core.c
 114:	DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
linux-6.1.2/kernel/sched/sched.h
 954:	struct rq {
...
 990:	        struct cfs_rq           cfs;
 991:	        struct rt_rq            rt;
 992:	        struct dl_rq            dl;
...
1153:	};
 550:	struct cfs_rq {
...
 568:	        struct rb_root_cached   tasks_timeline;
...
 574:	        struct sched_entity     *curr;
...
 650:	};

図? runqueueの構造(red-black tree)
linux-6.1.2/kernel/sched/fair.c
 628:	static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 629:	{
 630:	        rb_add_cached(&se->run_node, &cfs_rq->tasks_timeline, __entity_less);
 631:	}
 633:	static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 634:	{
 635:	        rb_erase_cached(&se->run_node, &cfs_rq->tasks_timeline);
 636:	}
 620:	static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
 621:	{
 622:	        return entity_before(__node_2_se(a), __node_2_se(b));
 623:	}
 583:	static inline bool entity_before(struct sched_entity *a,
 584:	                                struct sched_entity *b)
 585:	{
 586:	        return (s64)(a->vruntime - b->vruntime) < 0;
 587:	}
 589:	#define __node_2_se(node) \
 590:	        rb_entry((node), struct sched_entity, run_node)
linux-6.1.2/include/linux/rbtree.h
  28:	#define rb_entry(ptr, type, member) container_of(ptr, type, member)
 164:	static __always_inline struct rb_node *
 165:	rb_add_cached(struct rb_node *node, struct rb_root_cached *tree,
 166:	              bool (*less)(struct rb_node *, const struct rb_node *))
 167:	{
 168:	        struct rb_node **link = &tree->rb_root.rb_node;
 169:	        struct rb_node *parent = NULL;
 170:	        bool leftmost = true;
 171:	
 172:	        while (*link) {
 173:	                parent = *link;
 174:	                if (less(node, parent)) {
 175:	                        link = &parent->rb_left;
 176:	                } else {
 177:	                        link = &parent->rb_right;
 178:	                        leftmost = false;
 179:	                }
 180:	        }
 181:	
 182:	        rb_link_node(node, parent, link);
 183:	        rb_insert_color_cached(node, tree, leftmost);
 184:	
 185:	        return leftmost ? node : NULL;
 186:	}
&parent->rb_left), 大きければ右(&parent->rb_right) に進む。
cfs_rq->tasks_timeline->rb_leftmost にも保存。
linux-6.1.2/kernel/sched/core.c
5463:	void scheduler_tick(void)
5464:	{
5465:	        int cpu = smp_processor_id();
5466:	        struct rq *rq = cpu_rq(cpu);
5467:	        struct task_struct *curr = rq->curr;
...
5480:	        curr->sched_class->task_tick(rq, curr, 0);
...
5497:	}
linux-6.1.2/kernel/sched/fair.c
11740:	static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
11741:	{
11742:	        struct cfs_rq *cfs_rq;
11743:	        struct sched_entity *se = &curr->se;
11744:	
11745:	        for_each_sched_entity(se) {
11746:	                cfs_rq = cfs_rq_of(se);
11747:	                entity_tick(cfs_rq, se, queued);
11748:	        }
...
11757:	}
5036:	static void
5037:	entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
5038:	{
...
5042:	        update_curr(cfs_rq);
...
5069:	}
...
 882:	static void update_curr(struct cfs_rq *cfs_rq)
 883:	{
 884:	        struct sched_entity *curr = cfs_rq->curr;
 885:	        u64 now = rq_clock_task(rq_of(cfs_rq));
 886:	        u64 delta_exec;
 887:	
...
 891:	        delta_exec = now - curr->exec_start;
...
 895:	        curr->exec_start = now;
...
 905:	        curr->sum_exec_runtime += delta_exec;
...
 908:	        curr->vruntime += calc_delta_fair(delta_exec, curr);
 909:	        update_min_vruntime(cfs_rq);
...
 920:	}
 694:	static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
 695:	{
 696:	        if (unlikely(se->load.weight != NICE_0_LOAD))
 697:	                delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
 698:	
 699:	        return delta;
 700:	}
$ cat /proc/sched_debug ![[←]](../icons/screen-return.gif) Sched Debug Version: v0.09, 2.6.32-431.3.1.el6.x86_64 #1
now at 7955627655.961573 msecs
  .jiffies                                 : 12250294951
...
cpu#0, 2100.000 MHz
  .nr_running                    : 1
...
  .curr->pid                     : 30990
...
cfs_rq[0]:/
  .exec_clock                    : 40812852.059736
...
rt_rq[0]:/
  .rt_nr_running                 : 0
...
  .nr_running                    : 1
...
runnable tasks:
            task   PID         tree-key  switches  prio     exec-runtime         sum-exec        sum-sleep
----------------------------------------------------------------------------------------------------------
R            cat 30990  32644150.029656         2   120  32644150.029656         1.072543         0.366310 /
...
cpu#1, 2100.000 MHz
...
cpu#2, 2100.000 MHz
...
cpu#3, 2100.000 MHz
...
$ cat /proc/self/sched
Sched Debug Version: v0.09, 2.6.32-431.3.1.el6.x86_64 #1
now at 7955627655.961573 msecs
  .jiffies                                 : 12250294951
...
cpu#0, 2100.000 MHz
  .nr_running                    : 1
...
  .curr->pid                     : 30990
...
cfs_rq[0]:/
  .exec_clock                    : 40812852.059736
...
rt_rq[0]:/
  .rt_nr_running                 : 0
...
  .nr_running                    : 1
...
runnable tasks:
            task   PID         tree-key  switches  prio     exec-runtime         sum-exec        sum-sleep
----------------------------------------------------------------------------------------------------------
R            cat 30990  32644150.029656         2   120  32644150.029656         1.072543         0.366310 /
...
cpu#1, 2100.000 MHz
...
cpu#2, 2100.000 MHz
...
cpu#3, 2100.000 MHz
...
$ cat /proc/self/sched ![[←]](../icons/screen-return.gif) cat (31354, #threads: 1)
---------------------------------------------------------
se.exec_start                      :    7962193228.073935
se.vruntime                        :      51856286.476132
se.sum_exec_runtime                :             1.211193
...
se.load.weight                     :                 1024
policy                             :                    0
prio                               :                  120
clock-delta                        :                  127
$
cat (31354, #threads: 1)
---------------------------------------------------------
se.exec_start                      :    7962193228.073935
se.vruntime                        :      51856286.476132
se.sum_exec_runtime                :             1.211193
...
se.load.weight                     :                 1024
policy                             :                    0
prio                               :                  120
clock-delta                        :                  127
$ ![[]](../icons/screen-cursor.gif) 
void h(int a,int b, int c) {
   ....
}
これを実現するために、どのようなコードを書けばよいか。以下の空欄を埋め
なさい。
2023/01/27 問題を少し修正します。回答は同じで良いです。空欄の順番が少しずれています。
struct timer_list my_timer;
int my_arg_a,my_arg_b,my_arg_c;
void f(unsigned long data) {
    // init_timer( /*空欄(a)*/ );
    timer_setup( /*空欄(a)*/, /*空欄(c)*/, /*省略*/ );
    my_timer.expires  = /*空欄(b)*/;
    // my_timer.data     = 0; // 削除
    // my_timer.function = /*空欄(c)*/;
    /*空欄(d)*/;
}
void my_timer_func(/*省略*/) {
     h( my_arg_a,my_arg_b,my_arg_c );
}
またポリシーがSCHED_NORMAL の時、 struct task_struct のフィールドで ある期間に消費したCPU時間(ナノ秒単位)を保持するフィールドを答えなさい。

図? 4つの要素を持つリスト構造
注意: 正しい二分探索木は、複数存在する。