commit b80bbe7e6f44bb42e5510f5c4ea01d1028ba9406
parent 1dfaedda180cd821bc9c00ee28cad817b1d950da
Author: Petar Yotsev <petar@yotsev.xyz>
Date: Mon, 21 Feb 2022 19:29:15 +0000
Change chain structure to lessen ram usage
Possibly at the cost of performance?
Diffstat:
M | main.cpp | | | 83 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------- |
1 file changed, 67 insertions(+), 16 deletions(-)
diff --git a/main.cpp b/main.cpp
@@ -56,7 +56,7 @@ struct chain {
}
char format;
int order;
- map<string, vector<char>> ngram;
+ map<string, vector<pair<char, int>>> ngram;
vector<string> beginnings;
};
@@ -71,8 +71,8 @@ int main(int argc, char** argv)
char format = 'n';
int order = 3;
- int length = 0;
- int itterations = 1;
+ int length = 7;
+ int itterations = 10;
bool generating = true;
bool saving = false;
@@ -81,7 +81,7 @@ int main(int argc, char** argv)
bool loud = false;
//
- // processing arguments
+ // processes arguments
//
string arg;
for (int i = 1; i < argc; ++i) {
@@ -118,9 +118,11 @@ int main(int argc, char** argv)
}
}
- map<string, vector<char>> ngram;
+ map<string, vector<pair<char, int>>> ngram;
vector<string> beginnings;
+ vector<pair<char, int>>* chars;
+
//
// gets the chain in one way or another
//
@@ -136,13 +138,33 @@ int main(int argc, char** argv)
if (line.size() > order)
beginnings.push_back(line.substr(0, order));
for (int i = 1; i + order < line.size(); ++i) {
- ngram[line.substr(i, order)].push_back(line[i + order]);
+ for (auto p : ngram[line.substr(i, order)]) {
+ if (p.first == line[i + order]) {
+ p.second++;
+ goto recorded;
+ }
+ }
+ {
+ pair<char, int> p(line[i + order], 1);
+ ngram[line.substr(i, order)].push_back(p);
+ }
+ recorded:;
}
}
} else {
while (getline(file, line)) {
for (int i = 0; i + order < line.size(); ++i) {
- ngram[line.substr(i, order)].push_back(line[i + order]);
+ for (auto p : ngram[line.substr(i, order)]) {
+ if (p.first == line[i + order]) {
+ p.second++;
+ goto recorded2;
+ }
+ }
+ {
+ pair<char, int> p(line[i + order], 1);
+ ngram[line.substr(i, order)].push_back(p);
+ }
+ recorded2:;
}
}
}
@@ -192,36 +214,65 @@ int main(int argc, char** argv)
// generates text from the chain
//
for (int i = 0; i < itterations || continuous; ++i) {
- string cgram;
+
// get random starting gram
+ string cgram;
+
if (format == 's') {
do {
cgram = beginnings[rand() % beginnings.size()];
} while (ngram[cgram].size() == 0);
} else {
- std::map<string, vector<char>>::iterator it;
+ std::map<string, vector<pair<char, int>>>::iterator it;
do {
it = ngram.begin();
- for (int i = 0; i < rand() % ngram.size(); ++i) {
+ for (int i = 0; i < rand() % ngram.size(); ++i)
++it;
- }
cgram = it->first;
} while (ngram[cgram].size() == 0);
}
+
+ // print the beginning gram
string result = cgram;
cout << result;
- char next = ngram[cgram][rand() % ngram[cgram].size()];
- // generate
+ // find the next letter
+ char next;
+
+ int sum = 0;
+ for (auto p : ngram[cgram])
+ sum += p.second;
+ int index = (rand() % sum) + 1;
+ for (auto p : ngram[cgram]) {
+ sum -= p.second;
+ if (sum < index) {
+ next = p.first;
+ break;
+ }
+ }
+
for (int i = 0; i < length - order || infinite; ++i) {
- result += next;
+ // print out next letter
cout << next;
+ // get next cgram
+ result += next;
cgram = result.substr(result.length() - order, order);
- if (ngram[cgram].size() == 0) {
+ if (ngram[cgram].size() == 0)
break;
+ // find the next letter
+ sum = 0;
+ for (auto p : ngram[cgram])
+ sum += p.second;
+ index = (rand() % sum) + 1;
+ for (auto p : ngram[cgram]) {
+ sum -= p.second;
+ if (sum < index) {
+ next = p.first;
+ break;
+ }
}
- next = ngram[cgram][rand() % ngram[cgram].size()];
}
+
if (format == 's')
cout << endl;
}