// parsetables.cc see license.txt for copyright and terms of use // code for parsetables.h #include "parsetables.h" // this module #include "bflatten.h" // BFlatten #include "trace.h" // traceProgress #include "crc.h" // crc32 #include "emitcode.h" // EmitCode #include "bit2d.h" // Bit2d #include // memset #include // qsort, system // array index code enum { UNASSIGNED = -1 }; using namespace sm; // fwd template void printTable(EltType const *table, int size, int rowLength, rostring typeName, rostring tableName); ParseTables::ParseTables(int t, int nt, int s, int p, StateId start, int final) { alloc(t, nt, s, p, start, final); } template void allocInitArray(T *&arr, int size, T init) { arr = new T[size]; for (int i=0; i void allocZeroArray(T *&arr, int size) { arr = new T[size]; memset(arr, 0, sizeof(arr[0]) * size); } void ParseTables::alloc(int t, int nt, int s, int p, StateId start, int final) { owning = true; temp = new TempData(s); numTerms = t; numNonterms = nt; numStates = s; numProds = p; actionCols = numTerms; actionRows = numStates; gotoCols = numNonterms; gotoRows = numStates; allocZeroArray(actionTable, actionTableSize()); allocZeroArray(gotoTable, gotoTableSize()); allocZeroArray(prodInfo, numProds); allocZeroArray(stateSymbol, numStates); // table of ambiguous actions is NULL until someone fills in the // whole thing; since we don't know how many there might be, we // can't even allocate the storage now ambigTableSize = 0; ambigTable = NULL; startState = start; finalProductionIndex = final; allocZeroArray(nontermOrder, nontermOrderSize()); if (ENABLE_CRS_COMPRESSION) { allocZeroArray(firstWithTerminal, numTerms); allocZeroArray(firstWithNonterminal, numNonterms); } else { firstWithTerminal = NULL; firstWithNonterminal = NULL; } bigProductionListSize = 0; bigProductionList = NULL; if (ENABLE_CRS_COMPRESSION) { allocZeroArray(productionsForState, numStates); } else { productionsForState = NULL; } if (ENABLE_CRS_COMPRESSION) { allocZeroArray(ambigStateTable, numStates); } else { ambigStateTable = NULL; } // # of bytes, but rounded up to nearest 32-bit boundary errorBitsRowSize = ((numTerms+31) >> 5) * 4; // no compressed info uniqueErrorRows = 0; errorBits = NULL; errorBitsPointers = NULL; actionIndexMap = NULL; actionRowPointers = NULL; gotoIndexMap = NULL; gotoRowPointers = NULL; } ParseTables::~ParseTables() { if (temp) { delete temp; } if (owning) { delete[] actionTable; delete[] gotoTable; delete[] prodInfo; delete[] stateSymbol; if (ambigTable) { delete[] ambigTable; } delete[] nontermOrder; if (firstWithTerminal) { delete[] firstWithTerminal; } if (firstWithNonterminal) { delete[] firstWithNonterminal; } if (bigProductionList) { delete[] bigProductionList; } if (errorBits) { delete[] errorBits; } if (actionIndexMap) { delete[] actionIndexMap; } if (gotoIndexMap) { delete[] gotoIndexMap; } } // these are always owned if (productionsForState) { delete[] productionsForState; } if (ambigStateTable) { delete[] ambigStateTable; } if (errorBitsPointers) { delete[] errorBitsPointers; } if (actionRowPointers) { delete[] actionRowPointers; } if (gotoRowPointers) { delete[] gotoRowPointers; } } ParseTables::TempData::TempData(int numStates) : ambigTable(), bigProductionList(), productionsForState(numStates), ambigStateTable(numStates) { productionsForState.setAll(UNASSIGNED); ambigStateTable.setAll(UNASSIGNED); } ParseTables::TempData::~TempData() {} ActionEntry ParseTables::validateAction(int code) const { // make sure that 'code' is representable; if this fails, most likely // there are more than 32k states or productions; in turn, the most // likely cause of *that* would be the grammar is being generated // automatically from some other specification; you can change the // typedefs of ActionEntry and GotoEntry in gramanl.h to get more // capacity ActionEntry ret = (ActionEntry)code; xassert((int)ret == code); return ret; } GotoEntry ParseTables::validateGoto(int code) const { // see above GotoEntry ret = (GotoEntry)code; xassert((int)ret == code); xassert(ret != errorGotoEntry); // otherwise collision with error code return ret; } // doesn't init anything; for use by emitConstructionCode's emitted code ParseTables::ParseTables(bool o) : owning(o), temp(NULL) { xassert(owning == false); } #if ENABLE_CRS_COMPRESSION ActionEntry makeAE(ActionEntryKind k, int index) { // must fit into 6 bits for my encoding if ((unsigned)index <= AE_MAXINDEX) { // ok } else { xfailure(stringc << "index " << index << " truncated!"); } if (k == AE_ERROR) { xassert(index == 0); } return k | index; } #endif ActionEntry ParseTables::encodeShift(StateId destState, int shiftedTermId) { #if ENABLE_CRS_COMPRESSION int delta = destState - firstWithTerminal[shiftedTermId]; return makeAE(AE_SHIFT, delta); #else return validateAction(+destState+1); #endif } ActionEntry ParseTables::encodeReduce(int prodId, StateId inWhatState) { #if ENABLE_CRS_COMPRESSION int begin = temp->productionsForState[inWhatState]; int end = temp->bigProductionList.length(); if (begin == UNASSIGNED) { // starting a new set of per-state productions temp->productionsForState[inWhatState] = end; temp->bigProductionList.push(prodId); return AE_REDUCE | 0 /*first in set*/; } else { // continuing a set; search for existing 'prodId' in that set int delta; for (int i=begin; ibigProductionList[i] == prodId) { // re-use this offset delta = i-begin; goto encode; } } // not found: add another production id to this set temp->bigProductionList.push(prodId); delta = end-begin; encode: return makeAE(AE_REDUCE, delta); } #else return validateAction(-prodId-1); #endif } ActionEntry ParseTables::encodeAmbig (ArrayStack const &set, StateId inWhatState) { #if ENABLE_CRS_COMPRESSION int begin = temp->ambigStateTable[inWhatState]; int end = temp->ambigTable.length(); if (begin == UNASSIGNED) { // starting a new set of per-state ambiguous actions temp->ambigStateTable[inWhatState] = end; appendAmbig(set); return makeAE(AE_AMBIGUOUS, 0 /*first in set*/); } else { // continuing a set: Look for another ambiguous action set in // the same line that has identical contents. Due to the way // sets are constructed, their representation is canonical. // This is important because some grammars (cc2) have many // ambiguous entries, but they're all the same set of actions; // were we to not consolidate like this, the 6-bit cell encoding // would not be enough. // # of big-table entries that will be used int encodeLen = set.length()+1; for (int i=begin; i+encodeLen <= end; i++) { // does this offset contain the same set of actions? if (compareAmbig(set, i)) { return makeAE(AE_AMBIGUOUS, i-begin /*delta*/); } } // no match appendAmbig(set); return makeAE(AE_AMBIGUOUS, end-begin /*delta*/); } #else int end = temp->ambigTable.length(); appendAmbig(set); return validateAction(numStates+end+1); #endif } void ParseTables::appendAmbig(ArrayStack const &set) { temp->ambigTable.push(set.length()); for (int j=0; j < set.length(); j++) { temp->ambigTable.push(set[j]); } } bool ParseTables::compareAmbig(ArrayStack const &set, int startIndex) { if (temp->ambigTable[startIndex] != set.length()) { return false; // mismatch in 1st entry } for (int j=0; j < set.length(); j++) { if (temp->ambigTable[startIndex+1+j] != set[j]) { return false; // mismatch in j+2nd entry } } return true; // match! } ActionEntry ParseTables::encodeError() const { #if ENABLE_CRS_COMPRESSION return makeAE(AE_ERROR, 0); #else return validateAction(0); #endif } GotoEntry ParseTables::encodeGoto(StateId destState, int shiftedNontermId) const { #if ENABLE_CRS_COMPRESSION xassert(0 <= shiftedNontermId && shiftedNontermId < numNonterms); int delta = destState - firstWithNonterminal[shiftedNontermId]; return validateGoto(delta); #else return validateGoto(destState); #endif } // simple alloc + copy template void copyArray(int &len, T *&dest, ArrayStack const &src) { len = src.length(); dest = new T[len]; memcpy(dest, src.getArray(), sizeof(T) * len); } // given an array 'src' of indices relative to 'base', allocate the // array 'dest' and fill it in with actual pointers into 'base' template void copyIndexPtrArray(int len, T **&dest, T *base, ArrayStack const &src) { dest = new T* [len]; for (int i=0; iambigTable); if (ENABLE_CRS_COMPRESSION) { // transfer bigProductionList copyArray(bigProductionListSize, bigProductionList, temp->bigProductionList); // transfer productionsForState, translating indices into pointers copyIndexPtrArray(numStates, productionsForState, bigProductionList, temp->productionsForState); // ambigStateTable copyIndexPtrArray(numStates, ambigStateTable, ambigTable, temp->ambigStateTable); } delete temp; temp = NULL; } // -------------------- table compression -------------------- void ParseTables::computeErrorBits() { traceProgress() << "computing errorBits[]\n"; // should only be done once xassert(!errorBits); // allocate and clear it int rowSize = ((numTerms+31) >> 5) * 4; allocZeroArray(errorBits, numStates * rowSize); // build the pointer table allocZeroArray(errorBitsPointers, numStates); // find and set the error bits fillInErrorBits(true /*setPointers*/); // compute which rows are identical; I only compress the rows (and // not the columns) because I can fold the former's compression into // the errorBitsPointers[] access, whereas the latter would require // yet another table int *compressed = new int[numStates]; // row -> new location in errorBits[] uniqueErrorRows = 0; int s; for (s=0; s < numStates; s++) { // is 's' the same as any rows that preceded it? for (int t=0; t < s; t++) { // do 's' and 't' have the same contents? if (0==memcmp(errorBitsPointers[s], errorBitsPointers[t], sizeof(ErrorBitsEntry) * errorBitsRowSize)) { // yes, map 's' to 't' instead compressed[s] = compressed[t]; goto next_s; } } // not the same as any compressed[s] = uniqueErrorRows; uniqueErrorRows++; next_s: ; } // make a smaller 'errorBits' array delete[] errorBits; allocZeroArray(errorBits, uniqueErrorRows * rowSize); // rebuild 'errorBitsPointers' according to 'compressed' for (s=0; s < numStates; s++) { errorBitsPointers[s] = errorBits + (compressed[s] * errorBitsRowSize); } delete[] compressed; // fill in the bits again, using the new pointers map fillInErrorBits(false /*setPointers*/); } void ParseTables::fillInErrorBits(bool setPointers) { for (int s=0; s < numStates; s++) { if (setPointers) { errorBitsPointers[s] = errorBits + (s * errorBitsRowSize); } for (int t=0; t < numTerms; t++) { if (isErrorAction(actionEntry((StateId)s, t))) { ErrorBitsEntry &b = errorBitsPointers[s][t >> 3]; b |= 1 << (t & 7); } } } } void ParseTables::mergeActionColumns() { traceProgress() << "merging action columns\n"; // can only do this if we've already pulled out the errors xassert(errorBits); // for now I assume we don't have a map yet xassert(!actionIndexMap); if (tracingSys("mergeActionColumnsPre")) { // print the action table before compression printTable(actionTable, actionTableSize(), actionCols, "ActionEntry", "actionTable"); } // compute graph of conflicting 'action' columns // (will be symmetric) Bit2d graph(point(numTerms, numTerms)); graph.setall(0); // fill it in for (int t1=0; t1 < numTerms; t1++) { for (int t2=0; t2 < t1; t2++) { // does column 't1' conflict with column 't2'? for (int s=0; s < numStates; s++) { ActionEntry a1 = actionEntry((StateId)s, t1); ActionEntry a2 = actionEntry((StateId)s, t2); if (isErrorAction(a1) || isErrorAction(a2) || a1 == a2) { // no problem } else { // conflict! graph.set(point(t1, t2)); graph.set(point(t2, t1)); break; } } } } // color the graph Array color(numTerms); // terminal -> color int numColors = colorTheGraph(color, graph); // build a new, compressed action table; the entries are initialized // to 'error', meaning every cell starts as don't-care ActionEntry *newTable; allocInitArray(newTable, numStates * numColors, errorActionEntry); // merge columns in 'actionTable' into those in 'newTable' // according to the 'color' map actionIndexMap = new TermIndex[numTerms]; for (int t=0; t color(numStates); // state -> color (equivalence class) int numColors = colorTheGraph(color, graph); // build a new, compressed action table ActionEntry *newTable; allocInitArray(newTable, numColors * actionCols, errorActionEntry); // merge rows in 'actionTable' into those in 'newTable' // according to the 'color' map // actionTable[]: // // t0 t1 t2 t3 // terminal equivalence classes // s0 // s1 // s2 // ... // /*states*/ // newTable[]: // // t0 t1 t2 t3 // terminal equivalence classes // c0 // c1 // c2 < e.g., union of state1 and state4 (color[1]==color[4]==2) > // ... // /*state equivalence classes (colors)*/ actionRowPointers = new ActionEntry* [numStates]; for (int s=0; s color(numNonterms); // nonterminal -> color int numColors = colorTheGraph(color, graph); // build a new, compressed goto table; the entries are initialized // to 'error', meaning every cell starts as don't-care GotoEntry *newTable; allocInitArray(newTable, numStates * numColors, encodeGotoError()); // merge columns in 'gotoTable' into those in 'newTable' // according to the 'color' map gotoIndexMap = new NtIndex[numNonterms]; for (int nt=0; nt color(numStates); // state -> color (equivalence class) int numColors = colorTheGraph(color, graph); // build a new, compressed goto table GotoEntry *newTable; allocInitArray(newTable, numColors * gotoCols, encodeGotoError()); // merge rows in 'gotoTable' into those in 'newTable' // according to the 'color' map // gotoTable[]: // // t0 t1 t2 t3 // nonterminal equivalence classes // s0 // s1 // s2 // ... // /*states*/ // newTable[]: // // t0 t1 t2 t3 // nonterminal equivalence classes // c0 // c1 // c2 < e.g., union of state1 and state4 (color[1]==color[4]==2) > // ... // /*state equivalence classes (colors)*/ gotoRowPointers = new GotoEntry* [numStates]; for (int s=0; s # of adjacent nodes Array degree(n); memset((int*)degree, 0, n * sizeof(int)); // node -> # of adjacent nodes that have colors already Array blocked(n); // initialize some arrays enum { UNASSIGNED = -1 }; { for (int i=0; i bestBlocked || // more constrained (chBlocked == bestBlocked && chUnblocked < bestUnblocked)) { // least constraining // new best best = choice; bestBlocked = chBlocked; bestUnblocked = chUnblocked; } } // get the assigned colors of the adjacent vertices Array adjColor(bestBlocked); int adjIndex = 0; for (int i=0; i usedColors) { usedColors = selColor+1; } // update 'blocked[]' for (int k=0; k void emitTable(EmitCode &out, EltType const *table, int size, int rowLength, rostring typeName, rostring tableName) { if (!table || !size) { out << " " << typeName << " *" << tableName << " = NULL;\n"; return; } bool printHex = 0==strcmp(typeName, "ErrorBitsEntry") || (ENABLE_CRS_COMPRESSION && 0==strcmp(typeName, "ActionEntry")) || (ENABLE_CRS_COMPRESSION && 0==strcmp(typeName, "GotoEntry")) ; bool needCast = 0==strcmp(typeName, "StateId"); if (size * sizeof(*table) > 50) { // suppress small ones out << " // storage size: " << size * sizeof(*table) << " bytes\n"; if (size % rowLength == 0) { out << " // rows: " << (size/rowLength) << " cols: " << rowLength << "\n"; } } int rowNumWidth = stringf("%d", size / rowLength /*round down*/).length(); // I make tables 'const' because that way the OS loader might be // smart enough to share them (on a read-only basis) across multiple // processes started from the same executable. But I immediately // cast them to non-const, since ParseTables doesn't declare // pointers-to-const (since it also has methods to modify the tables // at parser generation time). out << " static " << typeName << " const " << tableName << "[" << size << "] = {"; int row = 0; for (int i=0; i void emitTable2(EmitCode &out, EltType const *table, int size, int rowLength, rostring typeName, rostring tableName) { string tempName = stringc << tableName << "_static"; emitTable(out, table, size, rowLength, typeName, tempName); out << " " << tableName << " = const_cast<" << typeName << "*>(" << tempName << ");\n\n"; } template void emitOffsetTable(EmitCode &out, EltType **table, EltType *base, int size, rostring typeName, rostring tableName, rostring baseName) { if (!table) { out << " " << tableName << " = NULL;\n\n"; return; } // make the pointers persist by storing a table of offsets Array offsets(size); bool allUnassigned = true; for (int i=0; i < size; i++) { if (table[i]) { offsets[i] = table[i] - base; allUnassigned = false; } else { offsets[i] = UNASSIGNED; // codes for a NULL entry } } if (allUnassigned) { // for example, an LALR(1) grammar has no ambiguous entries in its tables size = 0; } if (size > 0) { out << " " << tableName << " = new " << typeName << " [" << size << "];\n"; emitTable(out, (int*)offsets, size, 16, "int", stringc << tableName << "_offsets"); // at run time, interpret the offsets table out << " for (int i=0; i < " << size << "; i++) {\n" << " int ofs = " << tableName << "_offsets[i];\n" << " if (ofs >= 0) {\n" << " " << tableName << "[i] = " << baseName << " + ofs;\n" << " }\n" << " else {\n" << " " << tableName << "[i] = NULL;\n" << " }\n" << " }\n\n"; } else { out << " // offset table is empty\n" << " " << tableName << " = NULL;\n\n"; } } // for debugging template void printTable(EltType const *table, int size, int rowLength, rostring typeName, rostring tableName) { // disabled for now since I don't need it anymore, and it adds // a link dependency on emitcode.cc ... #if 0 { EmitCode out("printTable.tmp"); emitTable(out, table, size, rowLength, typeName, tableName); } system("cat printTable.tmp; rm printTable.tmp"); #endif // 0 } // emit code for a function which, when compiled and executed, will // construct this same table (except the constructed table won't own // the table data, since it will point to static program data) void ParseTables::emitConstructionCode(EmitCode &out, rostring className, rostring funcName) { // must have already called 'finishTables' xassert(!temp); out << "// this makes a ParseTables from some literal data;\n" << "// the code is written by ParseTables::emitConstructionCode()\n" << "// in " << __FILE__ << "\n" << "class " << className << "_ParseTables : public ParseTables {\n" << "public:\n" << " " << className << "_ParseTables();\n" << "};\n" << "\n" << className << "_ParseTables::" << className << "_ParseTables()\n" << " : ParseTables(false /*owning*/)\n" << "{\n" ; // set all the integer-like variables #define SET_VAR(var) \ out << " " #var " = " << var << ";\n"; SET_VAR(numTerms); SET_VAR(numNonterms); SET_VAR(numStates); SET_VAR(numProds); SET_VAR(actionCols); SET_VAR(actionRows); SET_VAR(gotoCols); SET_VAR(gotoRows); SET_VAR(ambigTableSize); out << " startState = (StateId)" << (int)startState << ";\n"; SET_VAR(finalProductionIndex); SET_VAR(bigProductionListSize); SET_VAR(errorBitsRowSize); SET_VAR(uniqueErrorRows); #undef SET_VAR out << "\n"; // action table, one row per state emitTable2(out, actionTable, actionTableSize(), actionCols, "ActionEntry", "actionTable"); // goto table, one row per state emitTable2(out, gotoTable, gotoTableSize(), gotoCols, "GotoEntry", "gotoTable"); // production info, arbitrarily 16 per row emitTable2(out, prodInfo, numProds, 16, "ParseTables::ProdInfo", "prodInfo"); // state symbol map, arbitrarily 16 per row emitTable2(out, stateSymbol, numStates, 16, "SymbolId", "stateSymbol"); // ambigTable emitTable2(out, ambigTable, ambigTableSize, 16, "ActionEntry", "ambigTable"); // nonterminal order emitTable2(out, nontermOrder, nontermOrderSize(), 16, "NtIndex", "nontermOrder"); // errorBits emitTable2(out, errorBits, uniqueErrorRows * errorBitsRowSize, errorBitsRowSize, "ErrorBitsEntry", "errorBits"); emitOffsetTable(out, errorBitsPointers, errorBits, numStates, "ErrorBitsEntry*", "errorBitsPointers", "errorBits"); // actionIndexMap emitTable2(out, actionIndexMap, numTerms, 16, "TermIndex", "actionIndexMap"); // actionRowPointers emitOffsetTable(out, actionRowPointers, actionTable, numStates, "ActionEntry*", "actionRowPointers", "actionTable"); // gotoIndexMap emitTable2(out, gotoIndexMap, numNonterms, 16, "NtIndex", "gotoIndexMap"); // gotoRowPointers emitOffsetTable(out, gotoRowPointers, gotoTable, numStates, "GotoEntry*", "gotoRowPointers", "gotoTable"); if (ENABLE_CRS_COMPRESSION) { emitTable2(out, firstWithTerminal, numTerms, 16, "StateId", "firstWithTerminal"); emitTable2(out, firstWithNonterminal, numNonterms, 16, "StateId", "firstWithNonterminal"); emitTable2(out, bigProductionList, bigProductionListSize, 16, "ProdIndex", "bigProductionList"); emitOffsetTable(out, productionsForState, bigProductionList, numStates, "ProdIndex*", "productionsForState", "bigProductionList"); emitOffsetTable(out, ambigStateTable, ambigTable, numStates, "ActionEntry*", "ambigStateTable", "ambigTable"); } else { out << " firstWithTerminal = NULL;\n" << " firstWithNonterminal = NULL;\n" << " bigProductionList = NULL;\n" << " productionsForState = NULL;\n" << " ambigStateTable = NULL;\n" ; } out << "}\n" << "\n" << "\n" << "ParseTables *" << className << "::" << funcName << "()\n" << "{\n" << " return new " << className << "_ParseTables;\n" << "}\n" << "\n" ; } // EOF