/* * COM1201 w99 class * 1/21/99 R P Futrelle * File: mach-prob-1.cp * This has discussion and code specs and snippets * from which to build machine problem #1. * It also explains the assignment, what you are to do. */ /* History 1/19/99 Found Sedg. bug, got basic search working for tags. 1/21/99 Produced this stripped version for distribution as MP1. */ /* YOUR ASSIGNMENT, FOR MACHINE PROBLEM #1 -- DUE FRIDAY 29 JANUARY BEFORE 5pm Fill in the rest of the code you need to get the various functions working so that you can reproduce the test I show at the end of the file. You are encouraged to work with a number of smaller and simpler tests, rather than an omnibus one of the type I have below. Note that the two line file I used, of the form: blah <!-- comment test --> more blah. 012345678901234567890123456789012345679 is very handy in checking searching for segments, since you can tell from the digits in the second line exactly the position of the characters in the first line. (What character do you think is in position 39?) */ // CAUTION -- MUCH IS OMITTED BELOW. THIS CODE WILL NOT COMPILE OR RUN. // THINK OF WHAT IS HERE AS GUIDES, HINTS, OR CLUES -- THEN FILL IN WHAT'S MISSING. #include <string.h> #include <fstream.h> // to handle file I/O #include "mach-prob-1.h" // the header has declarations of variables, functions, and classes. char buffer[BUFF_MAX]; // creates a large character buffer. BUFF_MAX is a const. // do NOT use defines, that's what consts are for! struct segment {}; // has ints p1 and p2, buffer positions, and two-arg constructor // load_buffer reads a file into buffer, setting buffsize. // buffsize is number of chars in buffer, last index is buffsize - 1. // Returns false if file has too many chars to fit in buffer --and truncates file. bool load_buffer(char* filename) {} // ifstream ifile(filename); // opens file for input //while(ifile.get(ch) && buffsize != BUFF_MAX) // read in characters // this prints a piece of the buffer, a character at a time. Use a for loop. // could write an aux fn. to check to see if args are in range and upper >= lower. void print_buffer(long lower, long upper) {} // useful, for example, for counting all '<' characters. Returns number of ch's in buffer. long count_chars(char ch) // Sedgewick's brutesearch fn. modified to search buffer // Searches for string p starting in buffer at index start // His code has an off-by-one error, corrected here. // This also specifies where in the buffer the search starts. // This is important for making progress through the buffer as things are found. int brtsearch(char *p,long start); // this part of the modified code, compares buffer to pattern and corrects i-step. // if (buffer[i] != p[j]) { i -= j; j = -1; } // decrement by j, not j-1, off-by-one bug. // I wrote a variation that doesn't look at the entire buffer. Useful to stop runaway // searches for short items. Looks only length ahead. int brtsearch_limited(char* p, long start, int length); // Direct conversion of buffer segment to string object. // Uses strategy of inserting a null, '\0', in the buffer temporarily. // Example to show how it works: // cout << string_from_buffer(new segment(0, 30)) << endl; string string_from_buffer(segment* seg); /* ********************************************************** */ // Here are some scanning functions for the buffer. // Some return a (pointer to) segment, some a position, some a boolean. // Most of the functions assume that they start a particular position. long tag_start(long pos); // finds position of the next "<" starting from pos // returns true if this tag is a comment tag, must start on "<" bool comment_p(long pos); // returns segment including entire comment, must start on "<" of a known comment. // In segment returned, p1 is pos. of "<" and p2 is pos. of ">". segment* get_comment(long pos); // returns segement for tag name, must start on "<" of a tag. // tag name ends with blank or ">". // Don't use on comments! segment* tag_name(long pos); // The following code returns whichever position is nearest (least): // return(new segment(pos +1, ((bracket < blank) ? bracket : blank) - 1)); // the next two shouldn't be used on comments (why?) // true if any material besides tag name, pos must one past tag name bool tag_contents_p(long pos); // gets tag contents, minus leading space, starts one past tag name segment* tag_contents(long pos); /* Here's one test, 1/20/99, 2245h ************************** cout << "Loading small html file with comment." << endl; load_buffer("comment-test.txt"); cout << "buffsize is: " << buffsize << endl; print_buffer(0, buffsize -1); int pos = tag_start(0); cout << endl << "tag_start(0): " << pos << endl; cout << endl << "is comment? " << comment_p(pos) << endl; cout << endl << "comment_p(pos): " << pos << endl; segment* seg = get_comment(pos); cout << endl << seg->p1 << " " << seg->p2 << endl; cout << endl << endl << "Loading larger html file, also has comment." << endl; load_buffer("sample-2.html"); cout << endl << "buffsize is: " << buffsize << endl; print_buffer(0, buffsize -1); pos = tag_start(0); seg = tag_name(pos); cout << endl << "Tag segment is: " << seg->p1 << " " << seg->p2 << endl; cout << string_from_buffer(seg) << endl; pos = tag_start(6); seg = tag_name(pos); cout << endl << "Tag segment is: " << seg->p1 << " " << seg->p2 << endl; cout << string_from_buffer(seg) << endl; pos = tag_start(14); seg = tag_name(pos); cout << endl << "Tag segment is: " << seg->p1 << " " << seg->p2 << endl; cout << string_from_buffer(seg) << endl; And here is the output ******************************** Loading small html file with comment. buffsize is: 79 blah <!-- comment test --> more blah. 012345678901234567890123456789012345679 tag_start(0): 5 is comment? 1 comment_p(pos): 5 5 25 Loading larger html file, also has comment. buffsize is: 541 <html> <head> <meta http-equiv="content-type" content="text/html;charset=iso-8859-1"> <meta name="generator" content="GoLive CyberStudio 3"> <title>HTML experiment file #1</title> </head> <body> This is a simple file, sample-1.html created for anlaysis by COM1201 software tools. <!-- THIS IS A COMMENT >><<<< --> <p>By RP Futrelle, 1/17/99.</p> <p>This is a new paragraph and the last word is in <i>italics</i>.</p> <p>In this paragraph, the last word is bold <i><b>italics</b></i>.</p> <p> </body> </html> Tag segment is: 1 4 html Tag segment is: 10 13 head Tag segment is: 19 22 meta */