2026年01月28日
情報科学類 オペレーティングシステム II
筑波大学 システム情報系
新城 靖
<yas@cs.tsukuba.ac.jp>
このページは、次の URL にあります。
https://www.coins.tsukuba.ac.jp/~yas/coins/os2-2025/2026-01-28
あるいは、次のページから手繰っていくこともできます。
https://www.coins.tsukuba.ac.jp/~yas/
https://www.cs.tsukuba.ac.jp/~yas/
struct timeval {
time_t tv_sec; /* seconds. long int */
suseconds_t tv_usec; /* microseconds. long int */
};
int gettimeofday(struct timeval *tp, struct timezone *tzp);
int settimeofday(const struct timeval *tp, const struct timezone *tzp);
使い方
1: /*
2: gettimeofday-print.c -- get colander time and print
3: Created on: 2014/01/22 20:40:34
4: */
5:
6: #include <sys/time.h> /* gettimeofday() */
7: #include <time.h> /* ctime() */
8: #include <stdio.h>
9:
10: main()
11: {
12: struct timeval tv;
13: time_t sec;
14: gettimeofday( &tv, NULL );
15: sec = tv.tv_sec;
16: printf("%s", ctime(&sec) );
17: }
$ make gettimeofday-print
cc gettimeofday-print.c -o gettimeofday-print
$ ./gettimeofday-print
Mon Jan 27 11:50:00 2026
$ date
Mon Jan 27 11:50:01 JST 2026
$
POSIX 1003.1, 2003 の
struct timespec
では、ナノ秒単位。
struct timespec {
time_t tv_sec; /* Seconds. */
long int tv_nsec; /* Nanoseconds. */
};
int clock_settime(clockid_t clock_id, const struct timespec *tp);
int clock_gettime(clockid_t clock_id, struct timespec *tp);
int clock_getres(clockid_t clock_id, struct timespec *res);
clock_id としては、CLOCK_REALTIME (カレンダ時刻)やCLOCK_MONOTONIC があ
る。
カレンダ時刻は、変更できる。逆走させることも可能。
順方向のジャンプや逆走を避けて、カレンダ時刻を合わせるには、adjtime() を使う。
int adjtime(const struct timeval *delta, struct timeval *olddelta);adjtime() を使った時刻同期の方法。
struct itimerval {
struct timeval it_interval; /* next value */
struct timeval it_value; /* current value */
};
int setitimer(int which, const struct itimerval *value,
struct itimerval *ovalue);
int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
struct timeval *timeout);
int poll(struct pollfd *fds, nfds_t nfds, int timeout);
int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout);
int kevent(int kq, const struct kevent *changelist, int nchanges,
struct kevent *eventlist, int nevents, const struct timespec *timeout);
ネットワーク・プログラムでよく使う。複数の入力を監視する。指定された時
間、入力がなければ、システム・コールから復帰する。
参考: システムプログラム/select()による複数のクライアントに対するサービスの同時提供
なにもしない時間切れ。
unsigned int sleep(unsigned int seconds); int usleep(useconds_t usec); int nanosleep(const struct timespec *rqtp, struct timespec *rmtp);

図? タイマ関連のハードウェアの基本モデル
2つの機能がある。
その他の割込み
$ cat /sys/devices/system/clocksource/clocksource0/available_clocksource
tsc acpi_pm
$ cat /sys/devices/system/clocksource/clocksource0/current_clocksource
tsc
$
$ grep CONFIG_HZ .config
# CONFIG_HZ_PERIODIC is not set
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
# CONFIG_HZ_300 is not set
CONFIG_HZ_1000=y
CONFIG_HZ=1000
$
利用例
linux-6.18.2/include/asm-generic/param.h 8: # define HZ CONFIG_HZ /* Internal kernel timer frequency */ linux-6.18.2/include/linux/jiffies.h 85: extern u64 __cacheline_aligned_in_smp jiffies_64; 86: extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies;
$ ls -l vmlinux
-rwxr-xr-x 1 yas prof 52309776 Jan 2 12:54 vmlinux
$ nm vmlinux | grep 'D jiffies'
ffffffff82c099c0 D jiffies
ffffffff82c099c0 D jiffies_64
ffffffff82c09a80 D jiffies_lock
ffffffff82c09a40 D jiffies_seq
$
linux-6.18.2/kernel/time/tick-common.c
86: static void tick_periodic(int cpu)
87: {
88: if (READ_ONCE(tick_do_timer_cpu) == cpu) {
...
95: do_timer(1);
...
98: update_wall_time();
99: }
100:
101: update_process_times(user_mode(get_irq_regs()));
...
103: }
linux-6.18.2/kernel/time/timer.c
60: __visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
linux-6.18.2/kernel/time/timekeeping.c
2528: void do_timer(unsigned long ticks)
2529: {
2530: jiffies_64 += ticks;
...
2532: }
xtime_nsec >> shift でナノ秒を表す。
linux-6.18.2/include/linux/timekeeper_internal.h
140: struct timekeeper {
...
142: struct tk_read_base tkr_mono;
...
145: u64 xtime_sec;
...
183: };
50: struct tk_read_base {
51: struct clocksource *clock;
...
55: u32 shift;
56: u64 xtime_nsec;
...
59: };
linux-6.18.2/include/linux/time64.h
13: struct timespec64 {
14: time64_t tv_sec; /* seconds */
15: long tv_nsec; /* nanoseconds */
16: };
linux-6.18.2/kernel/time/timekeeping.c
202: static inline struct timespec64 tk_xtime(const struct timekeeper *tk)
203: {
204: struct timespec64 ts;
205:
206: ts.tv_sec = tk->xtime_sec;
207: ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
208: return ts;
209: }
linux-6.18.2/kernel/time/time.c
140: SYSCALL_DEFINE2(gettimeofday, struct __kernel_old_timeval __user *, tv,
141: struct timezone __user *, tz)
142: {
143: if (likely(tv != NULL)) {
144: struct timespec64 ts;
145:
146: ktime_get_real_ts64(&ts);
147: if (put_user(ts.tv_sec, &tv->tv_sec) ||
148: put_user(ts.tv_nsec / 1000, &tv->tv_usec))
149: return -EFAULT;
150: }
151: if (unlikely(tz != NULL)) {
152: if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
153: return -EFAULT;
154: }
155: return 0;
156: }
linux-6.18.2/kernel/time/timekeeping.c
793: void ktime_get_real_ts64(struct timespec64 *ts)
794: {
795: struct timekeeper *tk = &tk_core.timekeeper;
796: unsigned int seq;
797: u64 nsecs;
...
804: ts->tv_sec = tk->xtime_sec;
805: nsecs = timekeeping_get_ns(&tk->tkr_mono);
...
809: ts->tv_nsec = 0;
810: timespec64_add_ns(ts, nsecs);
811: }
linux-6.18.2/include/linux/timer_types.h
8: struct timer_list {
...
14: unsigned long expires;
15: void (*function)(struct timer_list *);
...
21: };
jiffies が増加して expires に達すれば、(*function)(tl) を呼ぶ。 引数 tl は、struct timer_list *。
主に次の関数で操作する。
(*function)() で独自のデータ(以下の例では struct s1 *)を得るには、次の ように from_timer() マクロか container_of() マクロを用いる。
struct s1 {
...
struct timer_list s_timer;
...
int s_x;
...
};
void timer_list_handler(struct timer_list *tl) {
struct s1 *p1 = from_timer(p1, tl, s_timer);
f( p1->s_x );
}

図? timer_list から外側の構造体を求める
linux-6.18.2/include/linux/timer.h
132: #define from_timer(var, callback_timer, timer_fieldname) \
133: container_of(callback_timer, typeof(*var), timer_fieldname)
linux-6.18.2/include/linux/container_of.h
19: #define container_of(ptr, type, member) ({ \
20: void *__mptr = (void *)(ptr); \
...
24: ((type *)(__mptr - offsetof(type, member))); })
linux-6.18.2/include/linux/stddef.h
16: #define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
linux-6.18.2/tools/include/linux/kernel.h
26: #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
linux-6.18.2/kernel/time/sleep_timeout.c
18: struct process_timer {
19: struct timer_list timer;
20: struct task_struct *task;
21: };
61: signed long __sched schedule_timeout(signed long timeout)
62: {
63: struct process_timer timer;
64: unsigned long expire;
...
93: expire = timeout + jiffies;
94:
95: timer.task = current;
96: timer_setup_on_stack(&timer.timer, process_timeout, 0);
97: timer.timer.expires = expire;
98: add_timer(&timer.timer);
99: schedule();
100: timer_delete_sync(&timer.timer);
101:
102: /* Remove the timer from the object tracker */
103: timer_destroy_on_stack(&timer.timer);
104:
105: timeout = expire - jiffies;
106:
107: out:
108: return timeout < 0 ? 0 : timeout;
109: }
110: EXPORT_SYMBOL(schedule_timeout);
23: static void process_timeout(struct timer_list *t)
24: {
25: struct process_timer *timeout = timer_container_of(timeout, t, timer);
26:
27: wake_up_process(timeout->task);
28: }
linux-6.18.2/include/linux/hrtimer.h
35: enum hrtimer_mode {
36: HRTIMER_MODE_ABS = 0x00,
37: HRTIMER_MODE_REL = 0x01,
...
56: };
linux-6.12.7/include/linux/hrtimer_types.h
13: enum hrtimer_restart {
14: HRTIMER_NORESTART, /* Timer is not restarted */
15: HRTIMER_RESTART, /* Timer must be restarted */
16: };
39: struct hrtimer {
...
42: enum hrtimer_restart (*function)(struct hrtimer *);
...
48: };
主に次の関数で操作する。
struct hrtimer my_timer;
hrtimer_init(&my_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
my_timer.function = my_timer_handler;
...
hrtimer_start(&my_timer, ktime_set(0, t_nano), HRTIMER_MODE_REL);
...
enum hrtimer_restart my_timer_handler(struct hrtimer *timer)
{
...
return HRTIMER_NORESTART;
}
例: Ethernet のドライバでモードを変更して 2 マイクロ秒だけ待つ。
様々な方法がある。
例1: 10 tick (インターバル・タイマによる割り込み)を待つ。
unsigned long timeout = jiffies + 10; // 10 ticks
while (time_before(jiffies,timeout))
continue;
例2: 2秒待つ
unsigned long delay = jiffies + 2*HZ; // 2秒
while (time_before(jiffies,delay))
continue;
unsigned long timeout = jiffies + 10; // 10 ticks
while (jiffies<timeout)
continue;
引き算して 0 と比較すると、オーバフローの問題が解決できる。
unsigned long timeout = jiffies + 10; // 10 ticks
while (jiffies-timeout<0)
continue;
次のマクロを使う方法もある。
linux-6.18.2/include/linux/jiffies.h 127: #define time_after(a,b) \ 128: (typecheck(unsigned long, a) && \ 129: typecheck(unsigned long, b) && \ 130: ((long)((b) - (a)) < 0)) ... 138: #define time_before(a,b) time_after(b,a) ... 147: #define time_after_eq(a,b) \ 148: (typecheck(unsigned long, a) && \ 149: typecheck(unsigned long, b) && \ 150: ((long)((a) - (b)) >= 0)) ... 158: #define time_before_eq(a,b) time_after_eq(b,a)
unsigned long delay = jiffies + 2*HZ; // 2秒
while (time_before(jiffies,delay))
cond_resched();
他に実行すべき重要なプロセスが存在する(条件)時には、スケジューラを呼ん
で、実行する。存在しなければ、空ループと同じ。ただし、スケジューラを呼
ぶ(sleepする可能性がある)ので、割り込みコンテキストからは使えない。
void ndelay(unsigned long nsecs) void udelay(unsigned long usecs) void mdelay(unsigned long msecs)udelay() は、ある回数のループで実装されている。回数は、CPUの速度等で決 まる。ndelay(), mdelay() は、udelay() を呼んでいる。
udelay() で1ミリ秒以上待ってはいけない。 ループのインデックスがオーバフローする可能性がある。
set_current_state( TASK_INTERRUPTIBLE ); // signal で起きる可能性がある schedule_timeout( s * HZ );実装には struct timer_list が使われている。
| 表示 | 説明 |
| NI | Nice。優先度を表す値。 |
$ /bin/ps l
F UID PID PPID PRI NI VSZ RSS WCHAN STAT TTY TIME COMMAND
0 1013 20638 20636 20 0 123572 2100 wait Ss pts/2 0:00 -bash
0 1013 21139 20638 20 0 155660 5900 poll_s S pts/2 0:02 xterm -class UXTerm -title uxterm -u8
0 1013 21150 21139 20 0 123552 2144 wait Ss pts/3 0:00 bash
0 1013 21560 20638 20 0 267808 22928 poll_s S+ pts/2 0:09 emacs -nw
0 1013 21784 21150 20 0 103748 956 signal T pts/3 0:00 lv kernel/time/timer.c
0 1013 27031 21150 20 0 108132 980 - R+ pts/3 0:00 /bin/ps l
$ /bin/nice /bin/ps l
F UID PID PPID PRI NI VSZ RSS WCHAN STAT TTY TIME COMMAND
0 1013 20638 20636 20 0 123572 2100 wait Ss pts/2 0:00 -bash
0 1013 21139 20638 20 0 155660 5900 poll_s S pts/2 0:02 xterm -class UXTerm -title uxterm -u8
0 1013 21150 21139 20 0 123552 2144 wait Ss pts/3 0:00 bash
0 1013 21560 20638 20 0 267808 22928 poll_s S+ pts/2 0:09 emacs -nw
0 1013 21784 21150 20 0 103748 956 signal T pts/3 0:00 lv kernel/time/timer.c
0 1013 27034 21150 30 10 108136 984 - RN+ pts/3 0:00 /bin/ps l
$ /bin/nice -19 /bin/ps l
F UID PID PPID PRI NI VSZ RSS WCHAN STAT TTY TIME COMMAND
0 1013 20638 20636 20 0 123572 2100 wait Ss pts/2 0:00 -bash
0 1013 21139 20638 20 0 155660 5900 - R pts/2 0:02 xterm -class UXTerm -title uxterm -u8
0 1013 21150 21139 20 0 123552 2144 wait Ss pts/3 0:00 bash
0 1013 21560 20638 20 0 267808 22928 poll_s S+ pts/2 0:09 emacs -nw
0 1013 21784 21150 20 0 103748 956 signal T pts/3 0:00 lv kernel/time/timer.c
0 1013 27035 21150 39 19 108132 984 - RN+ pts/3 0:00 /bin/ps l
$
1: /*
2: getpriority-pid.c -- 優先度の表示
3: ~yas/syspro/proc/getpriority-pid.c
4: Created on: 2009/12/14 12:15:11
5: */
6:
7: #include <stdio.h> /* stderr, fprintf() */
8: #include <sys/time.h> /* getpriority() */
9: #include <sys/resource.h> /* getpriority() */
10: #include <stdlib.h> /* strtol() */
11: #include <limits.h> /* strtol() */
12:
13: main( int argc, char *argv[] )
14: {
15: int which, who, prio;
16: pid_t pid;
17: if( argc != 2 )
18: {
19: fprintf(stderr,"Usage: %% %s pid\n",argv[0] );
20: exit( 1 );
21: }
22: pid = strtol( argv[1], NULL, 10 );
23: prio = getpriority( PRIO_PROCESS, pid );
24: printf("pid==%d, priority==%d\n", pid, prio);
25: }
$ ./getpriority-pid
Usage: % ./getpriority-pid pid
$ echo $$
21150
$ ./getpriority-pid
Usage: % ./getpriority-pid pid
$ ./getpriority-pid $$
pid==21150, priority==0
$ ./getpriority-pid 0
pid==0, priority==0
$ /bin/nice -10 ./getpriority-pid 0
pid==0, priority==10
$ /bin/nice -20 ./getpriority-pid 0
pid==0, priority==19
$
linux-6.18.2/include/linux/sched.h
819: struct task_struct {
...
827: unsigned int __state;
...
865: int prio;
866: int static_prio;
867: int normal_prio;
868: unsigned int rt_priority;
869:
870: struct sched_entity se;
871: struct sched_rt_entity rt;
872: struct sched_dl_entity dl;
...
877: const struct sched_class *sched_class;
...
919: unsigned int policy;
...
1668: } __attribute__ ((aligned (64)));
570: struct sched_entity {
571: /* For load-balancing: */
572: struct load_weight load;
573: struct rb_node run_node;
574: u64 deadline;
...
579: unsigned char on_rq;
...
585: u64 exec_start;
586: u64 sum_exec_runtime;
587: u64 prev_sum_exec_runtime;
588: u64 vruntime;
...
595: s64 vlag;
...
455: struct load_weight {
456: unsigned long weight;
457: u32 inv_weight;
458: };
struct task_struct の中に、prio 等のフィールドやstruct sched_entity が
ある。
struct sched_entity で重要なフィールド。詳しくは後述。
linux-6.18.2/include/uapi/linux/sched.h 114: #define SCHED_NORMAL 0 115: #define SCHED_FIFO 1 116: #define SCHED_RR 2 117: #define SCHED_BATCH 3 118: /* SCHED_ISO: reserved but not implemented yet */ 119: #define SCHED_IDLE 5 120: #define SCHED_DEADLINE 6 121: #define SCHED_EXT 7
linux-6.18.2/kernel/sys.c
329: SYSCALL_DEFINE2(getpriority, int, which, int, who)
330: {
331: struct task_struct *g, *p;
332: struct user_struct *user;
333: const struct cred *cred = current_cred();
334: long niceval, retval = -ESRCH;
335: struct pid *pgrp;
336: kuid_t uid;
...
342: switch (which) {
343: case PRIO_PROCESS:
344: if (who)
345: p = find_task_by_vpid(who);
346: else
347: p = current;
348: if (p) {
349: niceval = nice_to_rlimit(task_nice(p));
350: if (niceval > retval)
351: retval = niceval;
352: }
353: break;
354: case PRIO_PGRP:
...
367: case PRIO_USER:
...
383: }
...
391: return retval;
392: }
linux-6.18.2/include/linux/sched/prio.h
5: #define MAX_NICE 19
6: #define MIN_NICE -20
7: #define NICE_WIDTH (MAX_NICE - MIN_NICE + 1)
...
16: #define MAX_RT_PRIO 100
...
19: #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH)
...
27: #define NICE_TO_PRIO(nice) ((nice) + DEFAULT_PRIO)
28: #define PRIO_TO_NICE(prio) ((prio) - DEFAULT_PRIO)
...
33: static inline long nice_to_rlimit(long nice)
34: {
35: return (MAX_NICE - nice + 1);
36: }
linux-6.18.2/include/linux/sched.h
1891: static inline int task_nice(const struct task_struct *p)
1892: {
1893: return PRIO_TO_NICE((p)->static_prio);
1894: }
glibc-2.35/sysdeps/unix/sysv/linux/getpriority.c
27: #define PZERO 20
...
34: int
35: __getpriority (enum __priority_which which, id_t who)
36: {
37: int res;
38:
39: res = INLINE_SYSCALL (getpriority, 2, (int) which, who);
40: if (res >= 0)
41: res = PZERO - res;
42: return res;
43: }
44: libc_hidden_def (__getpriority)
45: weak_alias (__getpriority, getpriority)
linux-6.18.2/kernel/sched/core.c
10330: /*
10331: * Nice levels are multiplicative, with a gentle 10% change for every
10332: * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
10333: * nice 1, it will get ~10% less CPU time than another CPU-bound task
10334: * that remained on nice 0.
10335: *
10336: * The "10% effect" is relative and cumulative: from _any_ nice level,
10337: * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
10338: * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
10339: * If a task goes up by ~10% and another task goes down by ~10% then
10340: * the relative distance between them is ~25%.)
10341: */
10342: const int sched_prio_to_weight[40] = {
10343: /* -20 */ 88761, 71755, 56483, 46273, 36291,
10344: /* -15 */ 29154, 23254, 18705, 14949, 11916,
10345: /* -10 */ 9548, 7620, 6100, 4904, 3906,
10346: /* -5 */ 3121, 2501, 1991, 1586, 1277,
10347: /* 0 */ 1024, 820, 655, 526, 423,
10348: /* 5 */ 335, 272, 215, 172, 137,
10349: /* 10 */ 110, 87, 70, 56, 45,
10350: /* 15 */ 36, 29, 23, 18, 15,
10351: };
1432: void set_load_weight(struct task_struct *p, bool update_load)
1433: {
1434: int prio = p->static_prio - MAX_RT_PRIO;
1435: struct load_weight lw;
...
1441: lw.weight = scale_load(sched_prio_to_weight[prio]);
1442: lw.inv_weight = sched_prio_to_wmult[prio];
...
1452: p->se.load = lw;
1453: }
linux-6.18.2/include/linux/sched.h
448: # define SCHED_FIXEDPOINT_SHIFT 10
449: # define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
linux-6.18.2/kernel/sched/sched.h
146: #ifdef CONFIG_64BIT
147: # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
148: # define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT)
...
157: #else
158: # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT)
159: # define scale_load(w) (w)
160: # define scale_load_down(w) (w)
161: #endif
| 名前 | 説明 |
|---|---|
| enqueue_task | プロセスが実行可能(runnable)になった |
| dequeue_task | プロセスが実行可能ではなくなった |
| yield_task | CPUを譲る。dequeueしてenqueue |
| check_preempt_curr | 実行可能になった時にCPUを横取りすべきかをチェック |
| pick_next_task | 次に実行すべきプロセスを選ぶ |
| set_curr_task | スケジューリング・クラスが変更された |
| task_tick | タイマ割込み(tick)の時に呼ばれる |
| task_new | 新しいプロセスが生成された |
linux-6.18.2/kernel/sched/sched.h
2398: struct sched_class {
...
2404: void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
2405: bool (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
...
2412: struct task_struct *(*pick_task)(struct rq *rq);
...
2440: void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
...
2469: };
linux-6.18.2/kernel/sched/core.c
2080: void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
2081: {
...
2092: p->sched_class->enqueue_tas(krq, p, flags);
...
2101: }
2106: inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
2107: {
...
2124: return p->sched_class->dequeue_task(rq, p, flags);
2125: }
linux-6.18.2/kernel/sched/syscalls.c
513: int __sched_setscheduler(struct task_struct *p,
514: const struct sched_attr *attr,
515: bool user, bool pi)
516: {
...
696: next_class = __setscheduler_class(policy, newprio);
...
709: __setscheduler_params(p, attr);
710: p->sched_class = next_class;
711: p->prio = newprio;
...
753: }
290: static void __setscheduler_params(struct task_struct *p,
291: const struct sched_attr *attr)
292: {
293: int policy = attr->sched_policy;
294:
295: if (policy == SETPARAM_POLICY)
296: policy = p->policy;
297:
298: p->policy = policy;
299:
300: if (dl_policy(policy))
...
302: else if (fair_policy(policy))
303: __setparam_fair(p, attr);
...
319: p->normal_prio = normal_prio(p);
320: set_load_weight(p, true);
321: }
linux-6.18.2/kernel/sched/core.c
7272: const struct sched_class *__setscheduler_class(int policy, int prio)
7273: {
7274: if (dl_prio(prio))
7275: return &dl_sched_class;
7276:
7277: if (rt_prio(prio))
7278: return &rt_sched_class;
7279:
7280: #ifdef CONFIG_SCHED_CLASS_EXT
7281: if (task_should_scx(policy))
7282: return &ext_sched_class;
7283: #endif
7284:
7285: return &fair_sched_class;
7286: }
p->prio の値に応じて
&dl_sched_class か
&rt_sched_class か
&ext_sched_class か
&fair_sched_class のいずれかを指すようにする。
CPUバウンドのプロセスが複数存在した時、ある期間を定めて、この期間の間に、 (優先度を考慮して)公平になるようにCPU資源を割り当てる。この期間の間に、 1度はCPUを割り当てられるようにがんばる。この期間は、 kernel.sched_latency_ns で設定されている。以下の例では、15ミリ秒。
$ sysctl kernel.sched_latency_ns
kernel.sched_latency_ns = 15000000
$
たとえば、もし優先度が同じプロセスAとプロセスBが存在した時には、15ミリ
秒の間にプロセスAに7.5ミリ秒、プロセスBに7.5ミリ秒のCPU資源を割り当てる
ようにがんばる。
Linux CFS は、次の方法でスケジューリングを行なう。
EEVDF スケジューラの目標は CFS と同じ。使用可能なCPU時間をシステム内の すべての実行可能なタスク間で均等に分割すること。 EDF (Earliest Deadline First) は、実時間のスケジューリング方式だが、 EEVDF は、名前は似ているが、fair を目指している。
例:
| プロセス | A | B | C |
| Lag値 | 0 | 0 | 0 |
| vruntime | 0 | 0 | 0 |
| deadline | 30 | 30 | 30 |
| プロセス | A | B | C |
| Lag値 | -20 | 10 | 10 |
| vruntime | 30 | 0 | 0 |
| deadline | 60 | 30 | 30 |
| プロセス | A | B | C |
| Lag値 | -10 | -10 | 20 |
| vruntime | 30 | 60 | 0 |
| deadline | 60 | 90 | 30 |
| プロセス | A | B | C |
| Lag値 | 0 | 0 | 0 |
| vruntime | 30 | 60 | 90 |
| deadline | 60 | 90 | 120 |
https://docs.kernel.org/scheduler/sched-eevdf.html,The Linux Kernel/EEVDF Scheduler
https://lwn.net/Articles/969062/,LWN.net/Completing the EEVDF scheduler

図? runqueueの構造
linux-6.18.2/kernel/sched/core.c
123: DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
linux-6.18.2/kernel/sched/sched.h
1119: struct rq {
...
1147: struct cfs_rq cfs;
1148: struct rt_rq rt;
1149: struct dl_rq dl;
1150: #ifdef CONFIG_SCHED_CLASS_EXT
1151: struct scx_rq scx;
1152: #endif
...
1321: };
675: struct cfs_rq {
...
691: struct rb_root_cached tasks_timeline;
...
697: struct sched_entity *curr;
...
771: };
linux-6.18.2/kernel/sched/sched.h
2517: #define DEFINE_SCHED_CLASS(name) \
2518: const struct sched_class name##_sched_class \
2519: __aligned(__alignof__(struct sched_class)) \
2520: __section("__" #name "_sched_class")
linux-6.18.2/kernel/sched/fair.c
13636: DEFINE_SCHED_CLASS(fair) = {
13637:
13638: .enqueue_task = enqueue_task_fair,
13639: .dequeue_task = dequeue_task_fair,
...
13645: .pick_task = pick_task_fair,
...
13660: .task_tick = task_tick_fair,
...
13670: .update_curr = update_curr_fair,
...
13683: };
linux-6.18.2/kernel/sched/fair.c
6920: static void
6921: enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
6922: {
...
6976: enqueue_entity(cfs_rq, se, flags);
7039: }
5241: static void
5242: enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
5243: {
5244: bool curr = cfs_rq->curr == se;
...
5250: if (curr)
5251: place_entity(cfs_rq, se, flags);
...
5253: update_curr(cfs_rq);
...
5277: if (!curr)
5278: place_entity(cfs_rq, se, flags);
...
5288: if (!curr)
5289: __enqueue_entity(cfs_rq, se);
5290: se->on_rq = 1;
...
5305: }
5127: static void
5128: place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
5129: {
5130: u64 vslice, vruntime = avg_vruntime(cfs_rq);
...
5133: if (!se->custom_slice)
5134: se->slice = sysctl_sched_base_slice;
5135: vslice = calc_delta_fair(se->slice, se);
...
5213: se->vruntime = vruntime - lag;
...
5232: se->deadline = se->vruntime + vslice;
5233: }
1257: static void update_curr_fair(struct rq *rq)
1258: {
1259: update_curr(cfs_rq_of(&rq->donor->se));
1260: }
1207: static void update_curr(struct cfs_rq *cfs_rq)
1208: {
...
1223: delta_exec = update_se(rq, curr);
...
1227: curr->vruntime += calc_delta_fair(delta_exec, curr);
1228: resched = update_deadline(cfs_rq, curr);
1229: update_min_vruntime(cfs_rq);
...
1248: if (cfs_rq->nr_queued == 1)
1249: return;
1250:
1251: if (resched || !protect_slice(curr)) {
1252: resched_curr_lazy(rq);
1253: clear_buddies(cfs_rq, curr);
1254: }
1255: }
1155: static s64 update_se(struct rq *rq, struct sched_entity *se)
1156: {
1157: u64 now = rq_clock_task(rq);
1158: s64 delta_exec;
1159:
1160: delta_exec = now - se->exec_start;
...
1164: se->exec_start = now;
...
1166: struct task_struct *donor = task_of(se);
1167: struct task_struct *running = rq->curr;
...
1172: running->se.exec_start = now;
1173: running->se.sum_exec_runtime += delta_exec;
...
1193: return delta_exec;
1194: }
1041: static bool update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
1042: {
1043: if ((s64)(se->vruntime - se->deadline) < 0)
1044: return false;
...
1057: se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
...
1062: return true;
1063: }
290: static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
291: {
292: if (unlikely(se->load.weight != NICE_0_LOAD))
293: delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
294:
295: return delta;
296: }
260: static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight *lw)
261: {
...
285: }
5373: dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
5374: {
...
5425: if (se != cfs_rq->curr)
5426: __dequeue_entity(cfs_rq, se);
5461: }

図? runqueueの構造(red-black tree)
linux-6.18.2/kernel/sched/fair.c
848: static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
849: {
...
851: se->min_vruntime = se->vruntime;
...
853: rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
854: __entity_less, &min_vruntime_cb);
855: }
796: static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
797: {
798: return entity_before(__node_2_se(a), __node_2_se(b));
799: }
545: static inline bool entity_before(const struct sched_entity *a,
546: const struct sched_entity *b)
547: {
548: /*
549: * Tiebreak on vruntime seems unnecessary since it can
550: * hardly happen.
551: */
552: return (s64)(a->deadline - b->deadline) < 0;
553: }
857: static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
858: {
859: rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
860: &min_vruntime_cb);
...
862: }
linux-6.18.2/include/linux/rbtree_augmented.h
63: static __always_inline struct rb_node *
64: rb_add_augmented_cached(struct rb_node *node, struct rb_root_cached *tree,
65: bool (*less)(struct rb_node *, const struct rb_node *),
66: const struct rb_augment_callbacks *augment)
67: {
68: struct rb_node **link = &tree->rb_root.rb_node;
69: struct rb_node *parent = NULL;
70: bool leftmost = true;
71:
72: while (*link) {
73: parent = *link;
74: if (less(node, parent)) {
75: link = &parent->rb_left;
76: } else {
77: link = &parent->rb_right;
78: leftmost = false;
79: }
80: }
81:
82: rb_link_node(node, parent, link);
83: augment->propagate(parent, NULL); /* suboptimal */
84: rb_insert_augmented_cached(node, tree, leftmost, augment);
85:
86: return leftmost ? node : NULL;
87: }
linux-6.18.2/include/linux/rbtree.h
59: static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
60: struct rb_node **rb_link)
61: {
...
63: node->rb_left = node->rb_right = NULL;
64:
65: *rb_link = node;
66: }
&parent->rb_left), 大きければ右(&parent->rb_right) に進む。
EEVDF では、donation とう仕組みで、他のプロセスから借りた時間で先に実 行することがある。
linux-6.18.2/kernel/sched/core.c
5573: void sched_tick(void)
5574: {
5575: int cpu = smp_processor_id();
5576: struct rq *rq = cpu_rq(cpu);
5578: struct task_struct *donor;
...
5589: donor = rq->donor;
...
5600: donor->sched_class->task_tick(rq, donor, 0);
...
5622: }
linux-6.18.2/kernel/sched/fair.c
13129: static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
13130: {
13131: struct cfs_rq *cfs_rq;
13132: struct sched_entity *se = &curr->se;
...
13135: cfs_rq = cfs_rq_of(se);
13136: entity_tick(cfs_rq, se, queued);
...
13146: }
5564: static void
5565: entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
5566: {
...
5570: update_curr(cfs_rq);
...
5588: }
linux-6.6.9/kernel/sched/fair.c
12480: static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
12481: {
12482: struct cfs_rq *cfs_rq;
12483: struct sched_entity *se = &curr->se;
12484:
12485: for_each_sched_entity(se) {
12486: cfs_rq = cfs_rq_of(se);
12487: entity_tick(cfs_rq, se, queued);
12488: }
...
12497: }
5395: static void
5396: entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
5397: {
...
5401: update_curr(cfs_rq);
...
5425: }
1150: static void update_curr(struct cfs_rq *cfs_rq)
1151: {
1152: struct sched_entity *curr = cfs_rq->curr;
1153: u64 now = rq_clock_task(rq_of(cfs_rq));
1154: u64 delta_exec;
...
1159: delta_exec = now - curr->exec_start;
...
1163: curr->exec_start = now;
...
1173: curr->sum_exec_runtime += delta_exec;
...
1176: curr->vruntime += calc_delta_fair(delta_exec, curr);
...
1189: }
311: static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
312: {
313: if (unlikely(se->load.weight != NICE_0_LOAD))
314: delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
315:
316: return delta;
317: }
$ cat /proc/sched_debug
Sched Debug Version: v0.11, 4.18.0-477.15.1.el8_8.x86_64 #1
...
jiffies : 8037018588
...
sysctl_sched
.sysctl_sched_latency : 18.000000
.sysctl_sched_min_granularity : 10.000000
...
cpu#0, 3092.734 MHz
.nr_running : 0
...
.curr->pid : 0
...
cfs_rq[0]:/system.slice/shibd.service
...
.se->exec_start : 3740976147.098853
.se->vruntime : 16904453.116095
.se->sum_exec_runtime : 488423.358201
cfs_rq[0]:/system.slice/mariadb.service
...
.se->exec_start : 3740976147.098853
.se->vruntime : 16904453.116095
.se->sum_exec_runtime : 488423.358201
...
cfs_rq[0]:/system.slice/httpd.service
...
.se->exec_start : 3740976409.981736
.se->vruntime : 16904452.748796
.se->sum_exec_runtime : 467578.885170
...
runnable tasks:
S task PID tree-key switches prio wait-time sum-exec sum-sleep
-------------------------------------------------------------------------------------------------------------
...
S shibd 2029016 36399.620037 1312918 120 0.000000 17147.974128 0.000000 0 0 /system.slice/shibd.service
...
S mysqld 3062817 223694.881335 2877 120 0.000000 377.018340 0.000000 0 0 /system.slice/mariadb.service
...
S httpd 1887526 304759.136719 29475 120 0.000000 4818.694375 0.000000 0 0 /system.slice/httpd.service
...
>R cat 3065061 23683938.282852 1 120 0.000000 2.674255 0.000000 0 0 /
...
cpu#1, 3092.734 MHz
..
cpu#2, 3092.734 MHz
...
cpu#3, 3092.734 MHz
...
$ cat /proc/self/sched
cat (3065430, #threads: 1)
-------------------------------------------------------------------
se.exec_start : 3736477629.503557
se.vruntime : 24389027.685650
se.sum_exec_runtime : 0.463870
...
se.load.weight : 1048576
...
policy : 0
prio : 120
...
$
void h(int a,int b, int c) {
....
}
これを実現するために、どのようなコードを書けばよいか。以下の空欄を埋め
なさい。
struct timer_list my_timer;
int my_arg_a,my_arg_b,my_arg_c;
void f(unsigned long data) {
timer_setup( /*空欄(a)*/, /*空欄(b)*/, 0);
my_timer.expires = /*空欄(c)*/;
/*空欄(d)*/;
}
void my_timer_func(/*省略*/) {
h( my_arg_a,my_arg_b,my_arg_c );
}
またポリシーがSCHED_NORMAL の時、EEVDF で sched_class->enqueue_task() として呼ばれる関数を答えなさい。

図? 4つの要素を持つリスト構造
注意: 正しい二分探索木は、複数存在する。