MIT 6.S081入门lab6 cow
由于本实验的前置课程包括2部分 Interrupts和 Multiprocessors and locking,因此本次实验记录也分为2部分
一、参考资料阅读与总结
1.xv6 book书籍阅读(chapter 5 Interrupts and device drivers)
1. 概述
设备驱动程序: 位置:操作系统;作用:配置设备,执行操作,处理中断,并与上层进行交互;核心:驱动程序和其管理的设备是并发执行的
中断: 是引起trap的其中一种方式,在xv6中,中断经过中断管理程序devintr(kernel/trap.c),再到具体设备的中断处理程序
常见的中断形式: 上半部中断内核线程中运行,其本质上就是一种系统调用,调用设备的操作,下半部分在中断时候执行,即设备操作完成后,唤醒进程,同时让设备开始新工作。注意: 上半部和下半部是需要其并法的运行在多CPU上的。
2.代码:控制台输入
-
控制台: 通过UART串口硬件接受字符;处理特殊字符(Backpace和Ctrl等);到达完整的一行,之后用户进程如shell,使用read读取输入,并进行处理;
-
xv6实现: QEMU仿真的16550芯片
-
内核访问: 内存映射的UART控制器,并连接到相应的物理地址,对这些物理地址进行读写操作;UART的内存映射地址起始于0x10000000或UART0 (kernel/memlayout.h:21),控制器偏移量在(kernel/uart.c:22)中定义
kernel/uart.c:13
... / the UART control registers are memory-mapped // at address UART0. this macro returns the // address of one of the registers. #define Reg(reg) ((volatile unsigned char *)(UART0 + reg)) //宏定义地址 // the UART control registers. // some have different meanings for // read vs write. // see http://byterunner.com/16550.html 定义寄存器地址 #define RHR 0 // receive holding register (for input bytes) #define THR 0 // transmit holding register (for output bytes) #define IER 1 // interrupt enable register #define IER_TX_ENABLE (1<<0) #define IER_RX_ENABLE (1<<1) #define FCR 2 // FIFO control register #define FCR_FIFO_ENABLE (1<<0) #define FCR_FIFO_CLEAR (3<<1) // clear the content of the two FIFOs #define ISR 2 // interrupt status register #define LCR 3 // line control register #define LCR_EIGHT_BITS (3<<0) #define LCR_BAUD_LATCH (1<<7) // special mode to set baud rate #define LSR 5 // line status register #define LSR_RX_READY (1<<0) // input is waiting to be read from RHR #define LSR_TX_IDLE (1<<5) // THR can accept another character to send #define ReadReg(reg) (*(Reg(reg))) #define WriteReg(reg, v) (*(Reg(reg)) = (v)) ...
-
硬件初始化: consoleinit(kernel/console.c:184)调用 uartinit (kernel/uart.c:53)对每一个接受到的字节生成一个接受中断、发送完成的字节输出一个发送完成中断
kernel/console.c:183
void consoleinit(void) { initlock(&cons.lock, "cons"); uartinit(); // connect read and write system calls // to consoleread and consolewrite. devsw[CONSOLE].read = consoleread; devsw[CONSOLE].write = consolewrite; }
kernel/uart.c:52
void uartinit(void) { // disable interrupts. WriteReg(IER, 0x00); // special mode to set baud rate. WriteReg(LCR, LCR_BAUD_LATCH); // LSB for baud rate of 38.4K. WriteReg(0, 0x03); // MSB for baud rate of 38.4K. WriteReg(1, 0x00); // leave set-baud mode, // and set word length to 8 bits, no parity. WriteReg(LCR, LCR_EIGHT_BITS); // reset and enable FIFOs. WriteReg(FCR, FCR_FIFO_ENABLE | FCR_FIFO_CLEAR); // enable transmit and receive interrupts. WriteReg(IER, IER_TX_ENABLE | IER_RX_ENABLE); initlock(&uart_tx_lock, "uart"); }
-
shell读取通路流程: init.c (user/init.c:19)中打开的文件描述符从控制台读取输入
-> main(user/shell.c:145) -> 调用getcmd(user/shell.c:143) -> 调用gets(user/ulib.c: 56) -> 调用read
->用户键盘输入一个字符
->UART在RHR中接收到该字符,发出中断
->xv6接收到中断,陷入trap->trap handler发现是外部设备中断,进入devintr,设备是UART
->调用uartintr->调用uartgetc读取字符,发现RHR中有字符可读
->调用consoleintr:将输入字符缓冲到cons.buf中,如果读到'\n'或'ctrl+D',说明用户输入满足一行,就唤醒consoleread
->调用consoleread 读出一整行的用户输入,拷贝到用户空间中kernel/trap.c devintr
// check if it's an external interrupt or software interrupt, // and handle it. // returns 2 if timer interrupt, // 1 if other device, // 0 if not recognized. int devintr() { uint64 scause = r_scause(); if((scause & 0x8000000000000000L) && (scause & 0xff) == 9){ // this is a supervisor external interrupt, via PLIC. // irq indicates which device interrupted. int irq = plic_claim(); if(irq == UART0_IRQ){ uartintr(); } else if(irq == VIRTIO0_IRQ){ virtio_disk_intr(); } else if(irq){ printf("unexpected interrupt irq=%d\n", irq); } // the PLIC allows each device to raise at most one // interrupt at a time; tell the PLIC the device is // now allowed to interrupt again. if(irq) plic_complete(irq); return 1; } else if(scause == 0x8000000000000001L){ // software interrupt from a machine-mode timer interrupt, // forwarded by timervec in kernelvec.S. if(cpuid() == 0){ clockintr(); } // acknowledge the software interrupt by clearing // the SSIP bit in sip. w_sip(r_sip() & ~2); return 2; } else { return 0; } }
kernel/uart.c uartintr
// handle a uart interrupt, raised because input has // arrived, or the uart is ready for more output, or // both. called from trap.c. void uartintr(void) { // read and process incoming characters. while(1){ int c = uartgetc(); if(c == -1) break; consoleintr(c); } // send buffered characters. acquire(&uart_tx_lock); uartstart(); release(&uart_tx_lock); }
kernel/uart.c uartgetc
// read one input character from the UART. // return -1 if none is waiting. int uartgetc(void) { if(ReadReg(LSR) & 0x01){ // input data is ready. return ReadReg(RHR); } else { return -1; } }
kernel/console.c cons
struct { struct spinlock lock; // input #define INPUT_BUF 128 char buf[INPUT_BUF]; uint r; // Read index uint w; // Write index uint e; // Edit index } cons; ...
kernel/console.c consoleintr
... void consoleintr(int c) { acquire(&cons.lock); switch(c){ case C('P'): // Print process list. procdump(); break; case C('U'): // Kill line. while(cons.e != cons.w && cons.buf[(cons.e-1) % INPUT_BUF] != '\n'){ cons.e--; consputc(BACKSPACE); } break; case C('H'): // Backspace case '\x7f': if(cons.e != cons.w){ cons.e--; consputc(BACKSPACE); } break; default: if(c != 0 && cons.e-cons.r < INPUT_BUF){ c = (c == '\r') ? '\n' : c; // echo back to the user. consputc(c); // store for consumption by consoleread(). cons.buf[cons.e++ % INPUT_BUF] = c; if(c == '\n' || c == C('D') || cons.e == cons.r+INPUT_BUF){ // wake up consoleread() if a whole line (or end-of-file) // has arrived. cons.w = cons.e; wakeup(&cons.r); } } break; } release(&cons.lock); } ...
kernel/console.c consoleread
... // // user read()s from the console go here. // copy (up to) a whole input line to dst. // user_dist indicates whether dst is a user // or kernel address. // int consoleread(int user_dst, uint64 dst, int n) { uint target; int c; char cbuf; target = n; acquire(&cons.lock); while(n > 0){ // wait until interrupt handler has put some // input into cons.buffer. while(cons.r == cons.w){ if(myproc()->killed){ release(&cons.lock); return -1; } sleep(&cons.r, &cons.lock); } c = cons.buf[cons.r++ % INPUT_BUF]; if(c == C('D')){ // end-of-file if(n < target){ // Save ^D for next time, to make sure // caller gets a 0-byte result. cons.r--; } break; } // copy the input byte to the user-space buffer. cbuf = c; if(either_copyout(user_dst, dst, &cbuf, 1) == -1) break; dst++; --n; if(c == '\n'){ // a whole line has arrived, return to // the user-level read(). break; } } release(&cons.lock); return target - n; } // // the console input interrupt handler. // uartintr() calls this for input character. // do erase/kill processing, append to cons.buf, // wake up consoleread() if a whole line has arrived. // ...
3.代码 控制台输出:
-
控制台输出流程:
用户侧执行write系统调用(往往是使用prinf,经过一系列分类之后调用putc,putc调用write)
-> 系统调用sys_write(kernel/sysfile.c:82) -> 调用filewrite(kernel/file.c:135) ->
->->经过文件系统导向UART驱动程序的上半部分
->调用uartputc(kernel/uart.c:87)向缓冲输入数据
->调用uartstart启动传输(FIFO)注:当发送完成时候,会尝试通知休眠中的uartputc进程
->完成传输触发中段,引起trap,最后跳转进入uartintr ->调用uartstart发送更多字符,直到传输完成。kernel/uart.c:41 发送缓冲区
// the transmit output buffer. struct spinlock uart_tx_lock; #define UART_TX_BUF_SIZE 32 char uart_tx_buf[UART_TX_BUF_SIZE]; int uart_tx_w; // write next to uart_tx_buf[uart_tx_w++] int uart_tx_r; // read next from uart_tx_buf[uar_tx_r++]
kernel/uart.c uartputc
// add a character to the output buffer and tell the // UART to start sending if it isn't already. // blocks if the output buffer is full. // because it may block, it can't be called // from interrupts; it's only suitable for use // by write(). void uartputc(int c) { acquire(&uart_tx_lock); if(panicked){ for(;;) ; } while(1){ if(((uart_tx_w + 1) % UART_TX_BUF_SIZE) == uart_tx_r){ // buffer is full. // wait for uartstart() to open up space in the buffer. sleep(&uart_tx_r, &uart_tx_lock); } else { uart_tx_buf[uart_tx_w] = c; uart_tx_w = (uart_tx_w + 1) % UART_TX_BUF_SIZE; uartstart(); release(&uart_tx_lock); return; } } }
kernel/uart.c uartstart
// if the UART is idle, and a character is waiting // in the transmit buffer, send it. // caller must hold uart_tx_lock. // called from both the top- and bottom-half. void uartstart() { while(1){ if(uart_tx_w == uart_tx_r){ // transmit buffer is empty. return; } if((ReadReg(LSR) & LSR_TX_IDLE) == 0){ // the UART transmit holding register is full, // so we cannot give it another byte. // it will interrupt when it's ready for a new byte. return; } int c = uart_tx_buf[uart_tx_r]; uart_tx_r = (uart_tx_r + 1) % UART_TX_BUF_SIZE; // maybe uartputc() is waiting for space in the buffer. wakeup(&uart_tx_r); WriteReg(THR, c); } }
-
本质上是通过缓冲区和中断机制将设备活动和进程活动解耦,实现了I/O并发,由于uartstart几乎是无阻塞的,因此实现了uartputc的异步;
-
同时也存在阻塞版本,这被用于内核的printf中:
kernel/uart.c uartputc_sync
// alternate version of uartputc() that doesn't // use interrupts, for use by kernel printf() and // to echo characters. it spins waiting for the uart's // output register to be empty. void uartputc_sync(int c) { push_off(); if(panicked){ for(;;) ; }
3.驱动中的并发
- 在consoleread和consoleintr中,在处理可能存在的并发访问时,使用了锁来保护相应的资源(使用的是自旋锁)
- 锁所避免的潜在问题:
两个不同CPU上的进程同时调用consoleread。
即使CPU已经在执行consoleread,但硬件要求该CPU响应一个控制台(UART)的中断。
当consoleread执行时,硬件可能会在不同的CPU上响应一个控制台(UART)中断。 - 中断应存在的机制:处理程序尽可能的少,通过唤醒特定的进程完成之后的工作。
4.定时器中断
-
xv6的进程调度策略采用了基于时间片的进程切换机制,通过yield来实现进程调度
-
由于定时其中断在机器模式下运行,因此与trap机制不同
-
定时器中断: 在timerinit(kerenl/start.c:57)初始化,是实现了硬件编程(设定延时)+(设置scratch、类似于trapframe) + (设置mtvec为timervec)+使能定时器中断。
kerenl/start.c timerinit
// set up to receive timer interrupts in machine mode, // which arrive at timervec in kernelvec.S, // which turns them into software interrupts for // devintr() in trap.c. void timerinit() { // each CPU has a separate source of timer interrupts. int id = r_mhartid(); // ask the CLINT for a timer interrupt. int interval = 1000000; // cycles; about 1/10th second in qemu. *(uint64*)CLINT_MTIMECMP(id) = *(uint64*)CLINT_MTIME + interval; // prepare information in scratch[] for timervec. // scratch[0..3] : space for timervec to save registers. // scratch[4] : address of CLINT MTIMECMP register. // scratch[5] : desired interval (in cycles) between timer interrupts. uint64 *scratch = &mscratch0[32 * id]; scratch[4] = CLINT_MTIMECMP(id); scratch[5] = interval; w_mscratch((uint64)scratch); // set the machine-mode trap handler. w_mtvec((uint64)timervec); // enable machine-mode interrupts. w_mstatus(r_mstatus() | MSTATUS_MIE); // enable machine-mode timer interrupts. w_mie(r_mie() | MIE_MTIE); }
-
计时器中断设定核心: 保证不干扰中断的内核代码(透明机制)
-
计时器中断设计策略: 发出软件中断,立即返回(timervec)保存寄存器->使用寄存器更新计时器(喂狗)并触发软件中断-> 恢复寄存器;
kernel/kernelvec.S timervec
# # machine-mode timer interrupt. # .globl timervec .align 4 timervec: # start.c has set up the memory that mscratch points to: # scratch[0,8,16] : register save area. # scratch[32] : address of CLINT's MTIMECMP register. # scratch[40] : desired interval between interrupts. csrrw a0, mscratch, a0 sd a1, 0(a0) sd a2, 8(a0) sd a3, 16(a0) # schedule the next timer interrupt # by adding interval to mtimecmp. ld a1, 32(a0) # CLINT_MTIMECMP(hart) ld a2, 40(a0) # interval ld a3, 0(a1) add a3, a3, a2 sd a3, 0(a1) # raise a supervisor software interrupt. li a1, 2 csrw sip, a1 ld a3, 16(a0) ld a2, 8(a0) ld a1, 0(a0) csrrw a0, mscratch, a0 mret
5.现实世界:
- 由于内核代码可能被挂起,然后在不同的CPU上恢复,因此xv6内核代码比较复杂,但如果设备和计时器中断只在用户空间发生,代码可以被简化;
- 通常来说,驱动程序比核心内核占用了更多的代码量;
- UART在xv6中使用的是Programmed I/O,即使用软件来驱动数据传输,但是其难用于高速率传输;现代多使用DMA传输,即在RAM中准备数据,使用对寄存器的单次写入同时设备
- 中断有着较高的cpu开销,但是对高速设备,其多采用polling机制,即禁用中断,定期检查(但是如果空闲会浪费cpu时间)。
2.xv6 book书籍阅读(Chapter 6 Locking)
1.概述
- 锁所解决的操作系统问题: 并发;
- 锁所存在的缺陷: 锁的本质是将并行操作串行化,其有着扼杀性能的缺点
- 并发控制技术(同步工具):用于实现线程使用的,一种兼具并发性和正确性的抽象设计,是一种互斥原语;
常用的同步工具:锁Lock,条件变量Conditional Variable,信号量Semaphore等 - 锁的作用:
使用锁有助于避免更新的丢失;
使用锁使多步操作变为原子性的操作;
使用锁有助于维持一些规则或特性的不变性;
2.竞态条件
- 基本概念:
临界区Critical Section:访问共享资源的代码段
竞争条件Race Condition:多个执行线程同时进入临界区并尝试更新共享数据结构的情况 - 锁在竞态条件的作用:保护临界区域,将临界区域串行化
- 内核设计的其中一个主要挑战: 避免锁冲突
- 锁的位置也很重要,要在访问/修改临界区域之前。否则会降低性能
3.代码 LOCKS
-
xv6所提供的锁:自旋锁(spinloack)和睡眠锁(sleeplocks)
-由于需要保证多处理器互斥,因此需要使用原子操作 -
在xv6中,使用汇编完成相应的原子操作,其使用了可移植的c库进行了包装
-
acquire通过将锁包装在循环中实现了锁的自旋
kernel/spinloc.h 自旋锁的结构
// Mutual exclusion lock. struct spinlock { uint locked; // Is the lock held? // For debugging: char *name; // Name of lock. struct cpu *cpu; // The cpu holding the lock. };
kernel/spinlock.c acquire
// Acquire the lock. // Loops (spins) until the lock is acquired. void acquire(struct spinlock *lk) { push_off(); // disable interrupts to avoid deadlock. if(holding(lk)) panic("acquire"); // On RISC-V, sync_lock_test_and_set turns into an atomic swap: // a5 = 1 // s1 = &lk->locked // amoswap.w.aq a5, a5, (s1) while(__sync_lock_test_and_set(&lk->locked, 1) != 0) ; // Tell the C compiler and the processor to not move loads or stores // past this point, to ensure that the critical section's memory // references happen strictly after the lock is acquired. // On RISC-V, this emits a fence instruction. __sync_synchronize(); // Record info about lock acquisition for holding() and debugging. lk->cpu = mycpu(); }
kernel/spinlock.c release
// Release the lock. void release(struct spinlock *lk) { if(!holding(lk)) panic("release"); lk->cpu = 0; // Tell the C compiler and the CPU to not move loads or stores // past this point, to ensure that all the stores in the critical // section are visible to other CPUs before the lock is released, // and that loads in the critical section occur strictly before // the lock is released. // On RISC-V, this emits a fence instruction. __sync_synchronize(); // Release the lock, equivalent to lk->locked = 0. // This code doesn't use a C assignment, since the C standard // implies that an assignment might be implemented with // multiple store instructions. // On RISC-V, sync_lock_release turns into an atomic swap: // s1 = &lk->locked // amoswap.w zero, zero, (s1) __sync_lock_release(&lk->locked); pop_off(); }
4.代码 使用锁
-
使用锁的困难:使用多少锁、再哪里使用
-
锁使用的基本原则: 保护可能被不同cpu使用(读写)的变量;锁保护不变量,即如果不变量涉及多个位置,其都需要通过一个锁来保护;
-
大内核锁:本质是对内核上锁,保证内核的顺序执行,其牺牲了效率
-
粗粒度锁的例子(xv6): kalloc.c分配器,每个进程再获得锁时候需要自旋等待
改进方案:使其存在多个空闲列表,每个列表有着自己单独的锁,允许并行分配kernel/kalloc.c kmem结构体
struct { struct spinlock lock; //自旋锁 struct run *freelist; } kmem;
-
细粒度锁的例子(xv6): 对于每一个文件都有单一锁,因此对不同文件可以不需要等待
-
改进方案:粒度的进一步细化,从而实现进程同时写入一个文件的不同区域
xv6 中的锁
锁 描述 bcache.lock 保护块缓冲区缓存项(block buffer cache entries)的分配 cons.lock 串行化对控制台硬件的访问,避免混合输出 ftable.lock 串行化文件表中文件结构体的分配 icache.lock 保护索引结点缓存项(inode cache entries)的分配 vdisk_lock 串行化对磁盘硬件和DMA描述符队列的访问 kmem.lock 串行化内存分配 log.lock 串行化事务日志操作 管道的pi->lock 串行化每个管道的操作 pid_lock 串行化next_pid的增量 进程的p->lock 串行化进程状态的改变 tickslock 串行化时钟计数操作 索引结点的 ip->lock 串行化索引结点及其内容的操作 缓冲区的b->lock 串行化每个块缓冲区的操作
5.死锁和锁排序
- 内核在代码中的路径如果需要持有数个锁,其以相同的顺序获取这些锁是很重要的 -> 避免死锁;
- 由于sleep的工作方式,因此xv6包含很多进程锁在内和锁链;全局避免死锁需要按顺序获取锁
例: 创建一个文件需要目录锁、文件inode锁、磁盘缓冲区锁、驱动程序锁和进程锁; - 锁涉及的困难: 逻辑结构的冲突、锁的未知性
死锁问题主要是来源于细粒度的锁
6.锁和中断处理函数
-
锁和中断的交互的潜在风险: 中断和锁的互相作用(如sleep和计时器中断)会形成deadlock;
-
解决方案: 如果自旋锁被中断使用,CPU保证在启用中断时永远不能持有该锁;
xv6解决方案: CPU获得任何锁时,xv6禁止该CPU上的中断;
xv6使用:acquire调用push_off (kernel/spinlock.c:89) 并且release调用pop_off (kernel/spinlock.c:100)来跟踪当前CPU上锁的嵌套级别,当计数到0时,恢复中断使能状态; -
注意: 在设置锁之前关中断是很重要的,防止中断和锁同时出现的窗口期发生;先释放锁再开中断也是相同道理。
kernel/spinlock initlock
// Mutual exclusion spin locks. #include "types.h" #include "param.h" #include "memlayout.h" #include "spinlock.h" #include "riscv.h" #include "proc.h" #include "defs.h" void initlock(struct spinlock *lk, char *name) { lk->name = name; lk->locked = 0; lk->cpu = 0; }
kernel/spinlock push_off pop_off holding
// Check whether this cpu is holding the lock. // Interrupts must be off. int holding(struct spinlock *lk) { int r; r = (lk->locked && lk->cpu == mycpu()); return r; } // push_off/pop_off are like intr_off()/intr_on() except that they are matched: // it takes two pop_off()s to undo two push_off()s. Also, if interrupts // are initially off, then push_off, pop_off leaves them off. void push_off(void) { int old = intr_get(); intr_off(); //关中断 if(mycpu()->noff == 0) mycpu()->intena = old; mycpu()->noff += 1; } void pop_off(void) { struct cpu *c = mycpu(); if(intr_get()) //检查中断状态 panic("pop_off - interruptible"); if(c->noff < 1) //计数异常 panic("pop_off"); c->noff -= 1; if(c->noff == 0 && c->intena) intr_on(); //开中断 }
7.指令和内存访问程序
- 前提:为了实现更高的性能,往往代码不是顺序执行的;
- CPU排序规则: 并发模型;
- 防止编译器对锁进行并发优化:使用 __sync_synchronize();,设置内存障碍,防止优化,强制顺序执行
8.睡眠锁
-
自旋锁缺点:不能让出CPU;
-
睡眠锁目的: 等待锁时可以让出CPU、并允许在持有锁时让步(中断);
-
睡眠锁的实现: 使用自旋锁保护的锁定自端,使其能够原子的让出cpu并释放自旋锁;
注: 由于睡眠锁存在中断使能,因此不能在自旋锁临界区域使用;反过来是可以的;
自旋锁:短临界区域;睡眠锁:冗长操作 -
使用了自旋锁和睡眠锁相结合的方式
kernel/sleeplock.h
// Long-term locks for processes struct sleeplock { uint locked; // Is the lock held? struct spinlock lk; // spinlock protecting this sleep lock // For debugging: char *name; // Name of lock. int pid; // Process holding lock };
kernel/sleeplock.c
/ Sleeping locks #include "types.h" #include "riscv.h" #include "defs.h" #include "param.h" #include "memlayout.h" #include "spinlock.h" #include "proc.h" #include "sleeplock.h" void initsleeplock(struct sleeplock *lk, char *name) { initlock(&lk->lk, "sleep lock"); lk->name = name; lk->locked = 0; lk->pid = 0; } void acquiresleep(struct sleeplock *lk) { acquire(&lk->lk); //获取小锁 while (lk->locked) { sleep(lk, &lk->lk); //睡眠 } lk->locked = 1; 大锁 lk->pid = myproc()->pid; release(&lk->lk); //释放小锁 } void releasesleep(struct sleeplock *lk) { acquire(&lk->lk);获取小锁 lk->locked = 0; 释放大锁 lk->pid = 0; wakeup(lk); 唤醒 release(&lk->lk); //释放大锁 } int holdingsleep(struct sleeplock *lk) { int r; acquire(&lk->lk); r = lk->locked && (lk->pid == myproc()->pid); release(&lk->lk); return r; }
9.真实世界
- 在真实世界中,锁往往隐藏在更高级别的结构中,如同步队列;
- 带锁编程困难:最好使用识别竞争条件的工具,因为很容易错过锁的不变量;
- POSIX线程(Pthread):允许一个用户进程在不同的CPU上同时运行数个线程,其支持用户级锁和障碍等。其需要操作系统的支持,包括调度和同步机制;
- 无原子指令实现锁是可能的,但是其代价昂贵;
- 如果不同CPU尝试获取相同的锁,会导致昂贵的开销
-存在使用无锁的数据结构和算法,如使用链表+原子指令,但需要考虑指令和内存的重新排序
10.拓展知识
- 最基本的锁:互斥锁
要求:互斥、进步(存在机制决定进入临界区的顺序)、有限等待
常用方法:抢占式内核/非抢占式内核 - Peterson算法:谦让模型,但是存在其不能直接运行在现代硬件上、且其指令顺序会打乱
- 自旋锁的缺点:互斥,进步,但是不是有限等待
- 硬件支持:
锁:Test and Set指令、Compare and Swap指令、Fetch and Add指令
临界区:Load-Linked and Store-Conditional指令
强制更新内存:Memory Barriers内存屏障
原子变量:Atomic Variables - pthread的使用:https://zhuanlan.zhihu.com/p/353400345
- 条件变量: 保证线程的唤醒-睡眠,需要配合互斥锁使用
- 信号量:互斥锁+条件变量+全局状态变量;
- 同步问题: 有界缓冲区问题;读—写者问题;哲学家就餐问题
- 死锁成立的条件: 互斥;占有并等待;非抢占;循环等待;
二、涉及函数
Lec9
-
kernel/kernelvec.S: 见上文的对计时器中断的分析
-
kernel/plic.c: 主要通过操作内存,实现了对设备的控制,包括CPU、UART和 VIRTIO;同时也实现了对设备中断和相对应的CPU的查询
kernel/plic.c
#include "types.h" #include "param.h" #include "memlayout.h" #include "riscv.h" #include "defs.h" // // the riscv Platform Level Interrupt Controller (PLIC). // void plicinit(void) { // set desired IRQ priorities non-zero (otherwise disabled). *(uint32*)(PLIC + UART0_IRQ*4) = 1; *(uint32*)(PLIC + VIRTIO0_IRQ*4) = 1; } void plicinithart(void) { int hart = cpuid(); // set uart's enable bit for this hart's S-mode. *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ) | (1 << VIRTIO0_IRQ); // set this hart's S-mode priority threshold to 0. *(uint32*)PLIC_SPRIORITY(hart) = 0; } // ask the PLIC what interrupt we should serve. int plic_claim(void) { int hart = cpuid(); int irq = *(uint32*)PLIC_SCLAIM(hart); return irq; } // tell the PLIC we've served this IRQ. void plic_complete(int irq) { int hart = cpuid(); *(uint32*)PLIC_SCLAIM(hart) = irq; }
-
kernel/console.c:主要实现了对控制台这一虚拟设备的读写操作,包括非阻塞和阻塞的,其中读函数和写函数已经解释完成了,核心就是其和相应的驱动程序(UART相互调用,实现控制台的输入输出)
kernel/console.c
// // Console input and output, to the uart. // Reads are line at a time. // Implements special input characters: // newline -- end of line // control-h -- backspace // control-u -- kill line // control-d -- end of file // control-p -- print process list // #include <stdarg.h> #include "types.h" #include "param.h" #include "spinlock.h" #include "sleeplock.h" #include "fs.h" #include "file.h" #include "memlayout.h" #include "riscv.h" #include "defs.h" #include "proc.h" #define BACKSPACE 0x100 #define C(x) ((x)-'@') // Control-x // // send one character to the uart. // called by printf, and to echo input characters, // but not from write(). // void consputc(int c) //处理退格符号 { if(c == BACKSPACE){ // if the user typed backspace, overwrite with a space. uartputc_sync('\b'); uartputc_sync(' '); uartputc_sync('\b'); } else { uartputc_sync(c); } } struct { struct spinlock lock; // input #define INPUT_BUF 128 char buf[INPUT_BUF]; uint r; // Read index uint w; // Write index uint e; // Edit index } cons; //输入缓存 // // user write()s to the console go here. // int consolewrite(int user_src, uint64 src, int n) //循环输出字符 { int i; acquire(&cons.lock); for(i = 0; i < n; i++){ char c; if(either_copyin(&c, user_src, src+i, 1) == -1) break; uartputc(c); } release(&cons.lock); return i; } // // user read()s from the console go here. // copy (up to) a whole input line to dst. // user_dist indicates whether dst is a user // or kernel address. // int consoleread(int user_dst, uint64 dst, int n) //对整行进行数据处理 { uint target; int c; char cbuf; target = n; acquire(&cons.lock); while(n > 0){ // wait until interrupt handler has put some // input into cons.buffer. while(cons.r == cons.w){ if(myproc()->killed){ release(&cons.lock); return -1; } sleep(&cons.r, &cons.lock); } c = cons.buf[cons.r++ % INPUT_BUF]; if(c == C('D')){ // end-of-file if(n < target){ // Save ^D for next time, to make sure // caller gets a 0-byte result. cons.r--; } break; } // copy the input byte to the user-space buffer. cbuf = c; if(either_copyout(user_dst, dst, &cbuf, 1) == -1) break; dst++; --n; if(c == '\n'){ // a whole line has arrived, return to // the user-level read(). break; } } release(&cons.lock); return target - n; } // // the console input interrupt handler. // uartintr() calls this for input character. // do erase/kill processing, append to cons.buf, // wake up consoleread() if a whole line has arrived. // void consoleintr(int c) //对输入的单字符进行判定 { acquire(&cons.lock); switch(c){ case C('P'): // Print process list. procdump(); break; case C('U'): // Kill line. while(cons.e != cons.w && cons.buf[(cons.e-1) % INPUT_BUF] != '\n'){ cons.e--; consputc(BACKSPACE); } break; case C('H'): // Backspace case '\x7f': if(cons.e != cons.w){ cons.e--; consputc(BACKSPACE); } break; default: if(c != 0 && cons.e-cons.r < INPUT_BUF){ c = (c == '\r') ? '\n' : c; // echo back to the user. consputc(c); // store for consumption by consoleread(). cons.buf[cons.e++ % INPUT_BUF] = c; if(c == '\n' || c == C('D') || cons.e == cons.r+INPUT_BUF){ // wake up consoleread() if a whole line (or end-of-file) // has arrived. cons.w = cons.e; wakeup(&cons.r); } } break; } release(&cons.lock); } void consoleinit(void) //初始化结构体和设备 { initlock(&cons.lock, "cons"); uartinit(); // connect read and write system calls // to consoleread and consolewrite. devsw[CONSOLE].read = consoleread; devsw[CONSOLE].write = consolewrite; }
-
kernel/uart.c: uart设备的驱动程序,实现设备的初始化、通过调用console相应函数实现设备的读取,同时通过被console调用实现设备的输出;核心是对设备寄存器在内存映射中的操作。所有函数上文均以给出,这里就不赘述了;
-
kernel/printf.c: 内核中printf的实现,可以类比linux中的printk;同时实现printf的初始化
核心流程:printf读取所输出字符串,根据字符选择输出类型 -> 调用相应的printint、printptr或者相应的程序对数据进行输出预处理->调用consputc通过控制台输出;kernel/printf.c
// // formatted console output -- printf, panic. // #include <stdarg.h> #include "types.h" #include "param.h" #include "spinlock.h" #include "sleeplock.h" #include "fs.h" #include "file.h" #include "memlayout.h" #include "riscv.h" #include "defs.h" #include "proc.h" volatile int panicked = 0; // lock to avoid interleaving concurrent printf's. 自旋锁适应并发 static struct { struct spinlock lock; int locking; } pr; static char digits[] = "0123456789abcdef"; static void printint(int xx, int base, int sign) //对int,10进制和16进制的预处理 { char buf[16]; int i; uint x; if(sign && (sign = xx < 0)) x = -xx; else x = xx; i = 0; do { buf[i++] = digits[x % base]; } while((x /= base) != 0); if(sign) buf[i++] = '-'; while(--i >= 0) consputc(buf[i]); } static void printptr(uint64 x) //对地址数据的预处理 { int i; consputc('0'); consputc('x'); for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4) consputc(digits[x >> (sizeof(uint64) * 8 - 4)]); } // Print to the console. only understands %d, %x, %p, %s. void printf(char *fmt, ...) { va_list ap; int i, c, locking; char *s; locking = pr.locking; if(locking) acquire(&pr.lock); if (fmt == 0) panic("null fmt"); va_start(ap, fmt); for(i = 0; (c = fmt[i] & 0xff) != 0; i++){ if(c != '%'){ //字符分类1 consputc(c); continue; } c = fmt[++i] & 0xff; if(c == 0) break; switch(c){ //字符分类2 case 'd': printint(va_arg(ap, int), 10, 1); break; case 'x': printint(va_arg(ap, int), 16, 1); break; case 'p': printptr(va_arg(ap, uint64)); break; case 's': //对字符串的预处理 if((s = va_arg(ap, char*)) == 0) s = "(null)"; for(; *s; s++) consputc(*s); break; case '%': consputc('%'); break; default: // Print unknown % sequence to draw attention. consputc('%'); consputc(c); break; } } if(locking) release(&pr.lock); } void panic(char *s) //panic { pr.locking = 0; printf("panic: "); printf(s); printf("\n"); panicked = 1; // freeze uart output from other CPUs for(;;) ; } void printfinit(void) //初始化锁 { initlock(&pr.lock, "pr"); pr.locking = 1; }
LEC 10
- kernel/spinlock.h是锁的定义,已经给出了;
- kernel/spinlock.c 中也已经在上文中的阅读中给出了;
三、课程视频观看笔记
1.Interrupts
- 注:在真正的操作系统中,内存总是尽量被完全使用,同时,常驻内存总是比虚拟内存小的多 -> 内存的动态分配
- 中断过程: 硬件:触发中断;软件:保存工作;处理中断;恢复工作。
- 中断和trap的区别: 核心:中断是异步的,同时设备和CPU是并行的
- 外部中断in RISC-V是由外部设备引起的,其被映射到地址的相关位中:其由PLIC管理,其可以将中断交给不同的CPU来处理,如果所有核心的中断被禁用,其会保持并记录中断,直到存在cpu空闲。->内核实现
- 驱动程序: 用于管理硬件程序;其通常分为两层,及时相应部分和内核部分,其使用队列实现了并行;内核部分和上册应用程序进行交互,保证其能够返回到正确的程序。
- 设备使用内存映射I/O的方式操作设备的寄存器
- shell中的 $ ls实现流程:
放入 $ $ 到 uart->发送完成后,触发中断->发送完成;
ls:序列化字符,接受信息,产生中断->处理中断; - RISC-V对中断的支持:
中断管理位:
SIE程序中断管理器,包括E程序、S外部、T定时器;
SSTATVS:存在1位管理中断;
中断管理寄存器SIP:显示中断类型;
进入中断原因寄存器SCAUSE :显示中断原因;
STVEC:处理中断函数寄存器,保存trap处理函数; - xv6对寄存器编程:start.c初始化CPU、 main()以下的初始化对相应设备寄存器初始化,包括UART、PLIC(涉及多CPU的中断初始化和由优先级设置,xv6不启用中断优先级),最后在scheduler启用中断(所有cpu都运行调度器程序)
- 控制台输出流程修正:用户侧执行write系统调用(往往是使用prinf,经过一系列分类之后调用putc,putc调用write)
-> 系统调用sys_write(kernel/sysfile.c:82) -> 调用filewrite(kernel/file.c:135) -> 调用devsw中的write函数->
->-调用consolewrite(kernel/console.c:59);
->调用uartputc(kernel/uart.c:87)向缓冲(循环队列)输入数据,如果满休眠等待唤醒;
->调用uartstart启动传输(FIFO)注:当发送完成时候,会尝试通知休眠中的uartputc进程;
->完成传输触发中段,引起trap,最后跳转进入uartintr ->调用uartstart发送更多字符,直到传输完成。 - 中断时系统反应: > 清除SIE -> 保存pc、模式->设置为管理者模式->进入预设的trap处理程序(usertrap)
- 设备中断反应:在进入设备中断后,PLIC分配反应CPU并获得中断号码->根据中断号码处理中断。
- 并发与中断:
生产者-消费者并行性->由循环队列缓冲区实现(包括uart和consloe);
由于内核也存在中断时候,需要对内核的使能进行控制;
驱动的顶部和底部可以并行运行 ->使用锁,保证缓冲区的顺序性 - 中断的发展;,由于中断要处理的事件越来越多(硬件越来越复杂),因此为了解决超越中断处理速度的数据传输,使用轮询模式,即自旋来实现对高速数据设备的处理;复杂驱动可以在轮询和中断之间切换,以适应动态的负载。
2.Multiprocessors and locking
- 锁的需求: 多核心带来的处理并发系统调用;
- 锁的缺陷: 限制性能;
- 锁的核心: 避免竞态条件,实现串行化;
- 由于如果将锁自动化,其会的导致死板的错误,尤其是两个锁的中间;
- 锁的属性:避免丢失更新;使多步操作变为原子操作;维持不变量;
- 死锁解决:按照固定顺序取锁;
- 锁与模块化: 锁的顺序要求在一定程度上会损失模块化;
- 锁与性能: 细粒度锁:系统复杂、难以重构,但性能较好;通常的解决方案是从粗粒度到细粒度进行尝试,查看其对内存的影响;
- 以UART为例: 对于缓冲区而言 锁保证了:保护了写入的数据(缓冲区)为不变量、使读写寄存器变为原子操作、保持了寄存器不会被覆盖写入;
- 硬件解决方案:test and set实现:通过锁定地址实现,依赖于内存控制器/总线/高速缓存一致性协议等
- 内存排序: 在并发执行时候,为了防止针对与临界区域的优化,使用内存屏障防止内存对原子操作的内存年重排序;
- 带锁编程要点:非必须不使用,从粗粒度开始,善用冲突检测器;
四、完成lab及其代码
- Implement copy-on write:
声明函数与创建相应表示位
def.h
//kalloc.c
...
void* kcowalloc(void *);
void kaddrefpage(void *);
riscv.h
#define PTE_COW (1L << 8) // 1 -> this is a lazy page(cow page)
vm.c:修改相应的uvmcopy和copyout,实现内存的lazy分配,并实现相应的lazy分配函数
vm.c:
int
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
pte_t *pte;
uint64 pa, i;
uint flags;
// char *mem;
for(i = 0; i < sz; i += PGSIZE){
if((pte = walk(old, i, 0)) == 0)
panic("uvmcopy: pte should exist");
if((*pte & PTE_V) == 0)
panic("uvmcopy: page not present");
pa = PTE2PA(*pte);
if(*pte & PTE_W) { //cow只和可写页面有关
*pte = (*pte & ~PTE_W) | PTE_COW;
// printf("uvmcopy pte with PTE_W %p\n", *pte);
}
flags = PTE_FLAGS(*pte);
// // if((mem = kalloc()) == 0)
// // goto err;
// memmove(mem, (char*)pa, PGSIZE);
if(mappages(new, i, PGSIZE, (uint64)pa, flags) != 0){
// kfree(mem); //在这里不涉及内存分配,因此去掉相关代码
goto err;
}
kaddrefpage((void *) pa);
}
return 0;
err:
uvmunmap(new, 0, i / PGSIZE, 1);
return -1;
}
...
int
copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
uint64 n, va0, pa0;
printf("into copyout\n");
while(len > 0){
if((uvmiscowpage(dstva) == 0)) {
uvmcowcopy(dstva);//检查内存是否为lay分配,并进行分配
}
va0 = PGROUNDDOWN(dstva);
pa0 = walkaddr(pagetable, va0);
if(pa0 == 0)
return -1;
n = PGSIZE - (dstva - va0);
if(n > len)
n = len;
memmove((void *)(pa0 + (dstva - va0)), src, n);
len -= n;
src += n;
dstva = va0 + PGSIZE;
}
return 0;
}
...
int
uvmiscowpage(uint64 va) {
struct proc *p = myproc();
pte_t *pte;
if (va > p->sz) { //检查大小
printf("uvmiscowpage(): va over size\n");
return -1;
}
if ((pte = walk(p->pagetable, va, 0)) == 0) { //检查pte有效性
printf("uvmiscowpage(): pte is zero\n");
return -1;
}
if ((*pte & PTE_V) == 0) {//检查有效性
printf("uvmiscowpage(): page not present\n");
return -1;
}
if (*pte & PTE_COW) { //检查是否为cow页面
return 0;
}
printf("uvmiscowpage(): pte has something wrong\n");
return -1;
}
int
uvmcowcopy(uint64 va) {
pte_t *pte;
uint64 pa, new;
uint flags;
struct proc *p = myproc();
if ((pte = walk(p->pagetable, va, 0)) == 0) {
panic("uvmcowcopy: pte should exist");
}
pa = PTE2PA(*pte);
if ((new = (uint64)kcowalloc((void*)pa)) == 0) { //使用另外的函数,避免kfree的重复操作(在计数值 == 1时)
return -1;
}
flags = (PTE_FLAGS(*pte) | PTE_W) & ~PTE_COW; //调整flags
uvmunmap(p->pagetable, PGROUNDDOWN(va), 1, 0);
if (mappages(p ->pagetable, va, 1, new, flags) == -1) { //重新映射
panic("uvmcowcopy(): mappages");
}
return 0;
}
kalloc
#define PA2PGREF_ID(p) (((p)-KERNBASE)/PGSIZE)
#define PGREFMAX PA2PGREF_ID(PHYSTOP)
...
struct {
struct spinlock lock;
int acon[PGREFMAX];
} kpageref; //计数数组
#define PA2PGREF(p) kpageref.acon[PA2PGREF_ID((uint64)(p))]
...
kinit()
{
initlock(&kmem.lock, "kmem");
initlock(&kpageref.lock, "kpageref"); //初始化锁
freerange(end, (void*)PHYSTOP);
}
...
void
kfree(void *pa)
{
struct run *r;
if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
panic("kfree");
acquire(&kpageref.lock);
if (--PA2PGREF(pa) <= 0){ //先自减后判断,小于0释放页面
// Fill with junk to catch dangling refs.
memset(pa, 1, PGSIZE);
r = (struct run*)pa;
acquire(&kmem.lock);
r->next = kmem.freelist;
kmem.freelist = r;
release(&kmem.lock);
}
release(&kpageref.lock);
}
void *
kalloc(void)
{
struct run *r;
acquire(&kmem.lock);
r = kmem.freelist;
if(r)
kmem.freelist = r->next;
release(&kmem.lock);
if(r){
memset((char*)r, 5, PGSIZE); // fill with junk
acquire(&kpageref.lock);
PA2PGREF(r) = 1;
release(&kpageref.lock);
}
return (void*)r;
}
// Allocate one 4096-byte page of physical memory if ref is more than one
// if ref is less then one return the original one
// Returns a pointer that the kernel can use.
// Returns 0 if the memory cannot be allocated.
void*
kcowalloc(void *pa)
{
char *newpa;
// printf("kcowalloc\n");
acquire(&kpageref.lock);
if (PA2PGREF(pa) <=1) {
release(&kpageref.lock);
return pa;
}
if((newpa = kalloc()) == 0) {
release(&kpageref.lock);
return 0;
}
memmove(newpa, (char*)pa, PGSIZE);
PA2PGREF(pa)--;
release(&kpageref.lock); //尽量减少锁影响的范围
printf("%p num of REF is %d", pa, PA2PGREF(pa));
return (void*)newpa;
}
void
kaddrefpage(void * pa) {
acquire(&kpageref.lock);
PA2PGREF(pa)++;
release(&kpageref.lock);
}
参考文献
2020版xv6手册:http://xv6.dgs.zone/tranlate_books/book-riscv-rev1/c6/s0.html
xv6手册与代码笔记:https://zhuanlan.zhihu.com/p/352699414
xv6手册中文版:http://xv6.dgs.zone/tranlate_books/book-riscv-rev1/c4/s6.html
28天速通MIT 6.S081操作系统公开课:https://zhuanlan.zhihu.com/p/627573247
MIT6.s081操作系统笔记:https://juejin.cn/post/7013843036996108325