diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7e04e73 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +data/corpus.txt diff --git a/data/corpus.txt.gz b/data/corpus.txt.gz new file mode 100644 index 0000000..28ac78e Binary files /dev/null and b/data/corpus.txt.gz differ diff --git a/notes/chapter1.txt b/notes/chapter1.txt index bf0dc17..0ec899f 100644 --- a/notes/chapter1.txt +++ b/notes/chapter1.txt @@ -65,4 +65,41 @@ A USet where order matters. Its interface only changes in the `find` function: * `find(x)`: find the smallest y s.t. y >= x. thereby returning a useful value even if x isn't in the set. AKA successor search. -Difference between USet and SSet \ No newline at end of file +Difference between USet and SSet: sorting requires more steps (run time) and +complexity. A USet should be used unless an SSet is explicitly required. + +## Mathematical background + +(See notebook). + +## The model of computation + +Proper analysis requires a mathematical model of computation. The model in the +book is on a w-bit word-RAM model. + +* we can access cells of memory, each of which stores a w-bit word +* basic operations (arithmetic and logical) take constant time +* cells can be read or written in constant time +* the memory manager allows allocating a block of k cells of memory in O(k) + time +* size constraint: w >= log(n) where n is the number of elements stored in a + data structure +* data structures use a generic type T such that T occupies one word + +## Correctness, time complexity, and space complexity + +Three factors for analysing a data structure: + +* correctness: data structure must implement the interface +* time complexity: run times of operations on the data structure should + be as small as possible +* space complexity: the storage space used by a data structure should be + as small as possible + +Run times come in three flavours: + +1. Worst-case: an operation never takes longer than this +2. Amortized: if a data structure has an amortized run time of f(n), then + a sequence of m operations takes at most m f(n) time. +3. Expected: the actual run time is a random variable, and the expected + value of this run time is at most f(n). \ No newline at end of file diff --git a/src/ch01ex01.cc b/src/ch01ex01.cc new file mode 100644 index 0000000..e712d96 --- /dev/null +++ b/src/ch01ex01.cc @@ -0,0 +1,35 @@ +#include +#include +#include +#include +#include + +using namespace std; + +// Solve the following problems by reading a text file one line at a time +// and performing operations on each line in the appropriate data structure(s). +// Your implementations should be fast enough that even files containing +// a million lines can be processed in a few seconds. + + +// Read the input one line at a time and then write the lines out in +// reverse order, so that the last input line is printed first, then the +// second last input line, and so on. +static void +problem1(const char *path) +{ + return; +} + + +// main should just execute the problems in sequence. +int +main(int argc, char *argv[]) +{ + if (argc != 2) { + cerr << "No input file specified, exiting." << endl; + exit(1); + } + + problem1(argv[1]); +} \ No newline at end of file