markly

Markov chain for text generation
git clone git://git.yotsev.xyz/markly.git
Log | Files | Refs | README | LICENSE

commit b80bbe7e6f44bb42e5510f5c4ea01d1028ba9406
parent 1dfaedda180cd821bc9c00ee28cad817b1d950da
Author: Petar Yotsev <petar@yotsev.xyz>
Date:   Mon, 21 Feb 2022 19:29:15 +0000

Change chain structure to lessen ram usage
Possibly at the cost of performance?

Diffstat:
Mmain.cpp | 83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 67 insertions(+), 16 deletions(-)

diff --git a/main.cpp b/main.cpp @@ -56,7 +56,7 @@ struct chain { } char format; int order; - map<string, vector<char>> ngram; + map<string, vector<pair<char, int>>> ngram; vector<string> beginnings; }; @@ -71,8 +71,8 @@ int main(int argc, char** argv) char format = 'n'; int order = 3; - int length = 0; - int itterations = 1; + int length = 7; + int itterations = 10; bool generating = true; bool saving = false; @@ -81,7 +81,7 @@ int main(int argc, char** argv) bool loud = false; // - // processing arguments + // processes arguments // string arg; for (int i = 1; i < argc; ++i) { @@ -118,9 +118,11 @@ int main(int argc, char** argv) } } - map<string, vector<char>> ngram; + map<string, vector<pair<char, int>>> ngram; vector<string> beginnings; + vector<pair<char, int>>* chars; + // // gets the chain in one way or another // @@ -136,13 +138,33 @@ int main(int argc, char** argv) if (line.size() > order) beginnings.push_back(line.substr(0, order)); for (int i = 1; i + order < line.size(); ++i) { - ngram[line.substr(i, order)].push_back(line[i + order]); + for (auto p : ngram[line.substr(i, order)]) { + if (p.first == line[i + order]) { + p.second++; + goto recorded; + } + } + { + pair<char, int> p(line[i + order], 1); + ngram[line.substr(i, order)].push_back(p); + } + recorded:; } } } else { while (getline(file, line)) { for (int i = 0; i + order < line.size(); ++i) { - ngram[line.substr(i, order)].push_back(line[i + order]); + for (auto p : ngram[line.substr(i, order)]) { + if (p.first == line[i + order]) { + p.second++; + goto recorded2; + } + } + { + pair<char, int> p(line[i + order], 1); + ngram[line.substr(i, order)].push_back(p); + } + recorded2:; } } } @@ -192,36 +214,65 @@ int main(int argc, char** argv) // generates text from the chain // for (int i = 0; i < itterations || continuous; ++i) { - string cgram; + // get random starting gram + string cgram; + if (format == 's') { do { cgram = beginnings[rand() % beginnings.size()]; } while (ngram[cgram].size() == 0); } else { - std::map<string, vector<char>>::iterator it; + std::map<string, vector<pair<char, int>>>::iterator it; do { it = ngram.begin(); - for (int i = 0; i < rand() % ngram.size(); ++i) { + for (int i = 0; i < rand() % ngram.size(); ++i) ++it; - } cgram = it->first; } while (ngram[cgram].size() == 0); } + + // print the beginning gram string result = cgram; cout << result; - char next = ngram[cgram][rand() % ngram[cgram].size()]; - // generate + // find the next letter + char next; + + int sum = 0; + for (auto p : ngram[cgram]) + sum += p.second; + int index = (rand() % sum) + 1; + for (auto p : ngram[cgram]) { + sum -= p.second; + if (sum < index) { + next = p.first; + break; + } + } + for (int i = 0; i < length - order || infinite; ++i) { - result += next; + // print out next letter cout << next; + // get next cgram + result += next; cgram = result.substr(result.length() - order, order); - if (ngram[cgram].size() == 0) { + if (ngram[cgram].size() == 0) break; + // find the next letter + sum = 0; + for (auto p : ngram[cgram]) + sum += p.second; + index = (rand() % sum) + 1; + for (auto p : ngram[cgram]) { + sum -= p.second; + if (sum < index) { + next = p.first; + break; + } } - next = ngram[cgram][rand() % ngram[cgram].size()]; } + if (format == 's') cout << endl; }