/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following definitions.
- readsb
 - bzero
 - balloc
 - bfree
 - iinit
 - ialloc
 - iupdate
 - iget
 - idup
 - ilock
 - iunlock
 - iput
 - iunlockput
 - bmap
 - itrunc
 - stati
 - readi
 - writei
 - namecmp
 - dirlookup
 - dirlink
 - skipelem
 - namex
 - namei
 - nameiparent
 
   1 // File system implementation.  Five layers:
   2 //   + Blocks: allocator for raw disk blocks.
   3 //   + Log: crash recovery for multi-step updates.
   4 //   + Files: inode allocator, reading, writing, metadata.
   5 //   + Directories: inode with special contents (list of other inodes!)
   6 //   + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
   7 //
   8 // This file contains the low-level file system manipulation 
   9 // routines.  The (higher-level) system call implementations
  10 // are in sysfile.c.
  11 
  12 #include "types.h"
  13 #include "defs.h"
  14 #include "param.h"
  15 #include "stat.h"
  16 #include "mmu.h"
  17 #include "proc.h"
  18 #include "spinlock.h"
  19 #include "fs.h"
  20 #include "buf.h"
  21 #include "file.h"
  22 
  23 #define min(a, b) ((a) < (b) ? (a) : (b))
  24 static void itrunc(struct inode*);
  25 struct superblock sb;   // there should be one per dev, but we run with one dev
  26 
  27 // Read the super block.
  28 void
  29 readsb(int dev, struct superblock *sb)
  30 {
  31   struct buf *bp;
  32   
  33   bp = bread(dev, 1);
  34   memmove(sb, bp->data, sizeof(*sb));
  35   brelse(bp);
  36 }
  37 
  38 // Zero a block.
  39 static void
  40 bzero(int dev, int bno)
  41 {
  42   struct buf *bp;
  43   
  44   bp = bread(dev, bno);
  45   memset(bp->data, 0, BSIZE);
  46   log_write(bp);
  47   brelse(bp);
  48 }
  49 
  50 // Blocks. 
  51 
  52 // Allocate a zeroed disk block.
  53 static uint
  54 balloc(uint dev)
  55 {
  56   int b, bi, m;
  57   struct buf *bp;
  58 
  59   bp = 0;
  60   for(b = 0; b < sb.size; b += BPB){
  61     bp = bread(dev, BBLOCK(b, sb));
  62     for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
  63       m = 1 << (bi % 8);
  64       if((bp->data[bi/8] & m) == 0){  // Is block free?
  65         bp->data[bi/8] |= m;  // Mark block in use.
  66         log_write(bp);
  67         brelse(bp);
  68         bzero(dev, b + bi);
  69         return b + bi;
  70       }
  71     }
  72     brelse(bp);
  73   }
  74   panic("balloc: out of blocks");
  75 }
  76 
  77 // Free a disk block.
  78 static void
  79 bfree(int dev, uint b)
  80 {
  81   struct buf *bp;
  82   int bi, m;
  83 
  84   readsb(dev, &sb);
  85   bp = bread(dev, BBLOCK(b, sb));
  86   bi = b % BPB;
  87   m = 1 << (bi % 8);
  88   if((bp->data[bi/8] & m) == 0)
  89     panic("freeing free block");
  90   bp->data[bi/8] &= ~m;
  91   log_write(bp);
  92   brelse(bp);
  93 }
  94 
  95 // Inodes.
  96 //
  97 // An inode describes a single unnamed file.
  98 // The inode disk structure holds metadata: the file's type,
  99 // its size, the number of links referring to it, and the
 100 // list of blocks holding the file's content.
 101 //
 102 // The inodes are laid out sequentially on disk at
 103 // sb.startinode. Each inode has a number, indicating its
 104 // position on the disk.
 105 //
 106 // The kernel keeps a cache of in-use inodes in memory
 107 // to provide a place for synchronizing access
 108 // to inodes used by multiple processes. The cached
 109 // inodes include book-keeping information that is
 110 // not stored on disk: ip->ref and ip->flags.
 111 //
 112 // An inode and its in-memory represtative go through a
 113 // sequence of states before they can be used by the
 114 // rest of the file system code.
 115 //
 116 // * Allocation: an inode is allocated if its type (on disk)
 117 //   is non-zero. ialloc() allocates, iput() frees if
 118 //   the link count has fallen to zero.
 119 //
 120 // * Referencing in cache: an entry in the inode cache
 121 //   is free if ip->ref is zero. Otherwise ip->ref tracks
 122 //   the number of in-memory pointers to the entry (open
 123 //   files and current directories). iget() to find or
 124 //   create a cache entry and increment its ref, iput()
 125 //   to decrement ref.
 126 //
 127 // * Valid: the information (type, size, &c) in an inode
 128 //   cache entry is only correct when the I_VALID bit
 129 //   is set in ip->flags. ilock() reads the inode from
 130 //   the disk and sets I_VALID, while iput() clears
 131 //   I_VALID if ip->ref has fallen to zero.
 132 //
 133 // * Locked: file system code may only examine and modify
 134 //   the information in an inode and its content if it
 135 //   has first locked the inode. The I_BUSY flag indicates
 136 //   that the inode is locked. ilock() sets I_BUSY,
 137 //   while iunlock clears it.
 138 //
 139 // Thus a typical sequence is:
 140 //   ip = iget(dev, inum)
 141 //   ilock(ip)
 142 //   ... examine and modify ip->xxx ...
 143 //   iunlock(ip)
 144 //   iput(ip)
 145 //
 146 // ilock() is separate from iget() so that system calls can
 147 // get a long-term reference to an inode (as for an open file)
 148 // and only lock it for short periods (e.g., in read()).
 149 // The separation also helps avoid deadlock and races during
 150 // pathname lookup. iget() increments ip->ref so that the inode
 151 // stays cached and pointers to it remain valid.
 152 //
 153 // Many internal file system functions expect the caller to
 154 // have locked the inodes involved; this lets callers create
 155 // multi-step atomic operations.
 156 
 157 struct {
 158   struct spinlock lock;
 159   struct inode inode[NINODE];
 160 } icache;
 161 
 162 void
 163 iinit(int dev)
 164 {
 165   initlock(&icache.lock, "icache");
 166   readsb(dev, &sb);
 167   cprintf("sb: size %d nblocks %d ninodes %d nlog %d logstart %d inodestart %d bmap start %d\n", sb.size,
 168           sb.nblocks, sb.ninodes, sb.nlog, sb.logstart, sb.inodestart, sb.bmapstart);
 169 }
 170 
 171 static struct inode* iget(uint dev, uint inum);
 172 
 173 //PAGEBREAK!
 174 // Allocate a new inode with the given type on device dev.
 175 // A free inode has a type of zero.
 176 struct inode*
 177 ialloc(uint dev, short type)
 178 {
 179   int inum;
 180   struct buf *bp;
 181   struct dinode *dip;
 182 
 183   for(inum = 1; inum < sb.ninodes; inum++){
 184     bp = bread(dev, IBLOCK(inum, sb));
 185     dip = (struct dinode*)bp->data + inum%IPB;
 186     if(dip->type == 0){  // a free inode
 187       memset(dip, 0, sizeof(*dip));
 188       dip->type = type;
 189       log_write(bp);   // mark it allocated on the disk
 190       brelse(bp);
 191       return iget(dev, inum);
 192     }
 193     brelse(bp);
 194   }
 195   panic("ialloc: no inodes");
 196 }
 197 
 198 // Copy a modified in-memory inode to disk.
 199 void
 200 iupdate(struct inode *ip)
 201 {
 202   struct buf *bp;
 203   struct dinode *dip;
 204 
 205   bp = bread(ip->dev, IBLOCK(ip->inum, sb));
 206   dip = (struct dinode*)bp->data + ip->inum%IPB;
 207   dip->type = ip->type;
 208   dip->major = ip->major;
 209   dip->minor = ip->minor;
 210   dip->nlink = ip->nlink;
 211   dip->size = ip->size;
 212   memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
 213   log_write(bp);
 214   brelse(bp);
 215 }
 216 
 217 // Find the inode with number inum on device dev
 218 // and return the in-memory copy. Does not lock
 219 // the inode and does not read it from disk.
 220 static struct inode*
 221 iget(uint dev, uint inum)
 222 {
 223   struct inode *ip, *empty;
 224 
 225   acquire(&icache.lock);
 226 
 227   // Is the inode already cached?
 228   empty = 0;
 229   for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
 230     if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
 231       ip->ref++;
 232       release(&icache.lock);
 233       return ip;
 234     }
 235     if(empty == 0 && ip->ref == 0)    // Remember empty slot.
 236       empty = ip;
 237   }
 238 
 239   // Recycle an inode cache entry.
 240   if(empty == 0)
 241     panic("iget: no inodes");
 242 
 243   ip = empty;
 244   ip->dev = dev;
 245   ip->inum = inum;
 246   ip->ref = 1;
 247   ip->flags = 0;
 248   release(&icache.lock);
 249 
 250   return ip;
 251 }
 252 
 253 // Increment reference count for ip.
 254 // Returns ip to enable ip = idup(ip1) idiom.
 255 struct inode*
 256 idup(struct inode *ip)
 257 {
 258   acquire(&icache.lock);
 259   ip->ref++;
 260   release(&icache.lock);
 261   return ip;
 262 }
 263 
 264 // Lock the given inode.
 265 // Reads the inode from disk if necessary.
 266 void
 267 ilock(struct inode *ip)
 268 {
 269   struct buf *bp;
 270   struct dinode *dip;
 271 
 272   if(ip == 0 || ip->ref < 1)
 273     panic("ilock");
 274 
 275   acquire(&icache.lock);
 276   while(ip->flags & I_BUSY)
 277     sleep(ip, &icache.lock);
 278   ip->flags |= I_BUSY;
 279   release(&icache.lock);
 280 
 281   if(!(ip->flags & I_VALID)){
 282     bp = bread(ip->dev, IBLOCK(ip->inum, sb));
 283     dip = (struct dinode*)bp->data + ip->inum%IPB;
 284     ip->type = dip->type;
 285     ip->major = dip->major;
 286     ip->minor = dip->minor;
 287     ip->nlink = dip->nlink;
 288     ip->size = dip->size;
 289     memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
 290     brelse(bp);
 291     ip->flags |= I_VALID;
 292     if(ip->type == 0)
 293       panic("ilock: no type");
 294   }
 295 }
 296 
 297 // Unlock the given inode.
 298 void
 299 iunlock(struct inode *ip)
 300 {
 301   if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
 302     panic("iunlock");
 303 
 304   acquire(&icache.lock);
 305   ip->flags &= ~I_BUSY;
 306   wakeup(ip);
 307   release(&icache.lock);
 308 }
 309 
 310 // Drop a reference to an in-memory inode.
 311 // If that was the last reference, the inode cache entry can
 312 // be recycled.
 313 // If that was the last reference and the inode has no links
 314 // to it, free the inode (and its content) on disk.
 315 // All calls to iput() must be inside a transaction in
 316 // case it has to free the inode.
 317 void
 318 iput(struct inode *ip)
 319 {
 320   acquire(&icache.lock);
 321   if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){
 322     // inode has no links and no other references: truncate and free.
 323     if(ip->flags & I_BUSY)
 324       panic("iput busy");
 325     ip->flags |= I_BUSY;
 326     release(&icache.lock);
 327     itrunc(ip);
 328     ip->type = 0;
 329     iupdate(ip);
 330     acquire(&icache.lock);
 331     ip->flags = 0;
 332     wakeup(ip);
 333   }
 334   ip->ref--;
 335   release(&icache.lock);
 336 }
 337 
 338 // Common idiom: unlock, then put.
 339 void
 340 iunlockput(struct inode *ip)
 341 {
 342   iunlock(ip);
 343   iput(ip);
 344 }
 345 
 346 //PAGEBREAK!
 347 // Inode content
 348 //
 349 // The content (data) associated with each inode is stored
 350 // in blocks on the disk. The first NDIRECT block numbers
 351 // are listed in ip->addrs[].  The next NINDIRECT blocks are 
 352 // listed in block ip->addrs[NDIRECT].
 353 
 354 // Return the disk block address of the nth block in inode ip.
 355 // If there is no such block, bmap allocates one.
 356 static uint
 357 bmap(struct inode *ip, uint bn)
 358 {
 359   uint addr, *a;
 360   struct buf *bp;
 361 
 362   if(bn < NDIRECT){
 363     if((addr = ip->addrs[bn]) == 0)
 364       ip->addrs[bn] = addr = balloc(ip->dev);
 365     return addr;
 366   }
 367   bn -= NDIRECT;
 368 
 369   if(bn < NINDIRECT){
 370     // Load indirect block, allocating if necessary.
 371     if((addr = ip->addrs[NDIRECT]) == 0)
 372       ip->addrs[NDIRECT] = addr = balloc(ip->dev);
 373     bp = bread(ip->dev, addr);
 374     a = (uint*)bp->data;
 375     if((addr = a[bn]) == 0){
 376       a[bn] = addr = balloc(ip->dev);
 377       log_write(bp);
 378     }
 379     brelse(bp);
 380     return addr;
 381   }
 382 
 383   panic("bmap: out of range");
 384 }
 385 
 386 // Truncate inode (discard contents).
 387 // Only called when the inode has no links
 388 // to it (no directory entries referring to it)
 389 // and has no in-memory reference to it (is
 390 // not an open file or current directory).
 391 static void
 392 itrunc(struct inode *ip)
 393 {
 394   int i, j;
 395   struct buf *bp;
 396   uint *a;
 397 
 398   for(i = 0; i < NDIRECT; i++){
 399     if(ip->addrs[i]){
 400       bfree(ip->dev, ip->addrs[i]);
 401       ip->addrs[i] = 0;
 402     }
 403   }
 404   
 405   if(ip->addrs[NDIRECT]){
 406     bp = bread(ip->dev, ip->addrs[NDIRECT]);
 407     a = (uint*)bp->data;
 408     for(j = 0; j < NINDIRECT; j++){
 409       if(a[j])
 410         bfree(ip->dev, a[j]);
 411     }
 412     brelse(bp);
 413     bfree(ip->dev, ip->addrs[NDIRECT]);
 414     ip->addrs[NDIRECT] = 0;
 415   }
 416 
 417   ip->size = 0;
 418   iupdate(ip);
 419 }
 420 
 421 // Copy stat information from inode.
 422 void
 423 stati(struct inode *ip, struct stat *st)
 424 {
 425   st->dev = ip->dev;
 426   st->ino = ip->inum;
 427   st->type = ip->type;
 428   st->nlink = ip->nlink;
 429   st->size = ip->size;
 430 }
 431 
 432 //PAGEBREAK!
 433 // Read data from inode.
 434 int
 435 readi(struct inode *ip, char *dst, uint off, uint n)
 436 {
 437   uint tot, m;
 438   struct buf *bp;
 439 
 440   if(ip->type == T_DEV){
 441     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
 442       return -1;
 443     return devsw[ip->major].read(ip, dst, n);
 444   }
 445 
 446   if(off > ip->size || off + n < off)
 447     return -1;
 448   if(off + n > ip->size)
 449     n = ip->size - off;
 450 
 451   for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
 452     bp = bread(ip->dev, bmap(ip, off/BSIZE));
 453     m = min(n - tot, BSIZE - off%BSIZE);
 454     memmove(dst, bp->data + off%BSIZE, m);
 455     brelse(bp);
 456   }
 457   return n;
 458 }
 459 
 460 // PAGEBREAK!
 461 // Write data to inode.
 462 int
 463 writei(struct inode *ip, char *src, uint off, uint n)
 464 {
 465   uint tot, m;
 466   struct buf *bp;
 467 
 468   if(ip->type == T_DEV){
 469     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
 470       return -1;
 471     return devsw[ip->major].write(ip, src, n);
 472   }
 473 
 474   if(off > ip->size || off + n < off)
 475     return -1;
 476   if(off + n > MAXFILE*BSIZE)
 477     return -1;
 478 
 479   for(tot=0; tot<n; tot+=m, off+=m, src+=m){
 480     bp = bread(ip->dev, bmap(ip, off/BSIZE));
 481     m = min(n - tot, BSIZE - off%BSIZE);
 482     memmove(bp->data + off%BSIZE, src, m);
 483     log_write(bp);
 484     brelse(bp);
 485   }
 486 
 487   if(n > 0 && off > ip->size){
 488     ip->size = off;
 489     iupdate(ip);
 490   }
 491   return n;
 492 }
 493 
 494 //PAGEBREAK!
 495 // Directories
 496 
 497 int
 498 namecmp(const char *s, const char *t)
 499 {
 500   return strncmp(s, t, DIRSIZ);
 501 }
 502 
 503 // Look for a directory entry in a directory.
 504 // If found, set *poff to byte offset of entry.
 505 struct inode*
 506 dirlookup(struct inode *dp, char *name, uint *poff)
 507 {
 508   uint off, inum;
 509   struct dirent de;
 510 
 511   if(dp->type != T_DIR)
 512     panic("dirlookup not DIR");
 513 
 514   for(off = 0; off < dp->size; off += sizeof(de)){
 515     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
 516       panic("dirlink read");
 517     if(de.inum == 0)
 518       continue;
 519     if(namecmp(name, de.name) == 0){
 520       // entry matches path element
 521       if(poff)
 522         *poff = off;
 523       inum = de.inum;
 524       return iget(dp->dev, inum);
 525     }
 526   }
 527 
 528   return 0;
 529 }
 530 
 531 // Write a new directory entry (name, inum) into the directory dp.
 532 int
 533 dirlink(struct inode *dp, char *name, uint inum)
 534 {
 535   int off;
 536   struct dirent de;
 537   struct inode *ip;
 538 
 539   // Check that name is not present.
 540   if((ip = dirlookup(dp, name, 0)) != 0){
 541     iput(ip);
 542     return -1;
 543   }
 544 
 545   // Look for an empty dirent.
 546   for(off = 0; off < dp->size; off += sizeof(de)){
 547     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
 548       panic("dirlink read");
 549     if(de.inum == 0)
 550       break;
 551   }
 552 
 553   strncpy(de.name, name, DIRSIZ);
 554   de.inum = inum;
 555   if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
 556     panic("dirlink");
 557   
 558   return 0;
 559 }
 560 
 561 //PAGEBREAK!
 562 // Paths
 563 
 564 // Copy the next path element from path into name.
 565 // Return a pointer to the element following the copied one.
 566 // The returned path has no leading slashes,
 567 // so the caller can check *path=='\0' to see if the name is the last one.
 568 // If no name to remove, return 0.
 569 //
 570 // Examples:
 571 //   skipelem("a/bb/c", name) = "bb/c", setting name = "a"
 572 //   skipelem("///a//bb", name) = "bb", setting name = "a"
 573 //   skipelem("a", name) = "", setting name = "a"
 574 //   skipelem("", name) = skipelem("////", name) = 0
 575 //
 576 static char*
 577 skipelem(char *path, char *name)
 578 {
 579   char *s;
 580   int len;
 581 
 582   while(*path == '/')
 583     path++;
 584   if(*path == 0)
 585     return 0;
 586   s = path;
 587   while(*path != '/' && *path != 0)
 588     path++;
 589   len = path - s;
 590   if(len >= DIRSIZ)
 591     memmove(name, s, DIRSIZ);
 592   else {
 593     memmove(name, s, len);
 594     name[len] = 0;
 595   }
 596   while(*path == '/')
 597     path++;
 598   return path;
 599 }
 600 
 601 // Look up and return the inode for a path name.
 602 // If parent != 0, return the inode for the parent and copy the final
 603 // path element into name, which must have room for DIRSIZ bytes.
 604 // Must be called inside a transaction since it calls iput().
 605 static struct inode*
 606 namex(char *path, int nameiparent, char *name)
 607 {
 608   struct inode *ip, *next;
 609 
 610   if(*path == '/')
 611     ip = iget(ROOTDEV, ROOTINO);
 612   else
 613     ip = idup(proc->cwd);
 614 
 615   while((path = skipelem(path, name)) != 0){
 616     ilock(ip);
 617     if(ip->type != T_DIR){
 618       iunlockput(ip);
 619       return 0;
 620     }
 621     if(nameiparent && *path == '\0'){
 622       // Stop one level early.
 623       iunlock(ip);
 624       return ip;
 625     }
 626     if((next = dirlookup(ip, name, 0)) == 0){
 627       iunlockput(ip);
 628       return 0;
 629     }
 630     iunlockput(ip);
 631     ip = next;
 632   }
 633   if(nameiparent){
 634     iput(ip);
 635     return 0;
 636   }
 637   return ip;
 638 }
 639 
 640 struct inode*
 641 namei(char *path)
 642 {
 643   char name[DIRSIZ];
 644   return namex(path, 0, name);
 645 }
 646 
 647 struct inode*
 648 nameiparent(char *path, char *name)
 649 {
 650   return namex(path, 1, name);
 651 }