jeudi 23 juin 2016

C++ scanner.h scan content between double-quotes as a token: not skipping spaces inside quotes

I'm trying to get the content between a double-quote to count as one token for an assignment.

For example: "hello world" = 1 token "hello" "world" = 3 tokens (because space counts as 1 token)

I created main.cpp and I added "scanQuotesAsString" code to 3 modules given:

  • scanner.cpp
  • scanner.h
  • scanpriv.h

Right now, "hello world" scans a 2 tokens, not skipping the space. If I add (or skipspace, then regular input such as |hello world| without quotes skips spaces as well.

I think my issue is in scanner.cpp, where the last couple functions are:

/*
* Private method: scanToEndOfIdentifier
* Usage: finish = scanToEndOfIdentifier();
* ----------------------------------------
* This function advances the position of the scanner until it
* reaches the end of a sequence of letters or digits that make
* up an identifier. The return value is the index of the last
* character in the identifier; the value of the stored index
* cp is the first character after that.
*/
int Scanner::scanToEndOfIdentifier() {
    while (cp < len && isalnum(buffer[cp])) {
        if ((stringOption == ScanQuotesAsStrings) && (buffer[cp] == '"')) 
            break;
        cp++;
    }
    return cp - 1;
}


/* Private functions */
/*
* Private method: scanQuotedString
* Usage: scanQuotedString();
* -------------------
* This function advances the position of the scanner until the
* current character is a double quotation mark
*/
void Scanner::scanQuotedString() {
    while ((cp < len && (buffer[cp] == '"')) || (cp < len && (buffer[cp] == '"'))){
        cp++;
    }

Here is main.cc

#include "genlib.h"
#include "simpio.h"
#include "scanner.h"
#include <iostream>

/* Private function prototypes */

int CountTokens(string str);

int main() {
    cout << "Please enter a sentence: ";
    string str = GetLine();

    int num = CountTokens(str);
    cout << "You entered " << num << " tokens." << endl;
    return 0;
}

int CountTokens(string str) {

    int count = 0;
    Scanner scanner;        // create new scanner object            
    scanner.setInput(str);  // initialize the input to be scanned

    //scanner.setSpaceOption(Scanner::PreserveSpaces);
    scanner.setStringOption(Scanner::ScanQuotesAsStrings);

    while (scanner.hasMoreTokens()) { // read tokens from the scanner
        scanner.nextToken();
        count++;
    }
    return count;
}

Here's scanner.cpp

/*
* File: scanner.cpp
* -----------------
* Implementation for the simplified Scanner class.
*/
#include "genlib.h"
#include "scanner.h"
#include <cctype>
#include <iostream>
/*
* The details of the representation are inaccessible to the client,
* but consist of the following fields:
*
* buffer -- String passed to setInput
* len -- Length of buffer, saved for efficiency
* cp -- Current character position in the buffer
* spaceOption -- Setting of the space option extension
*/
Scanner::Scanner() {
    buffer = "";
    spaceOption = PreserveSpaces;
}
Scanner::~Scanner() {
/* Empty */
}
void Scanner::setInput(string str) {
    buffer = str;
    len = buffer.length();
    cp = 0;
}
/*
* Implementation notes: nextToken
* -------------------------------
* The code for nextToken follows from the definition of a token.
*/
string Scanner::nextToken() {
    if (cp == -1) {
        Error("setInput has not been called");
    }
    if (stringOption == ScanQuotesAsStrings) scanQuotedString();
    if (spaceOption == IgnoreSpaces) skipSpaces();
    int start = cp;
    if (start >= len) return "";
    if (isalnum(buffer[cp])) {
        int finish = scanToEndOfIdentifier();
        return buffer.substr(start, finish - start + 1);
    }
    cp++;
    return buffer.substr(start, 1);
}

bool Scanner::hasMoreTokens() {
    if (cp == -1) {
        Error("setInput has not been called");
    }
    if (stringOption == ScanQuotesAsStrings) scanQuotedString();
    if (spaceOption == IgnoreSpaces) skipSpaces();
    return (cp < len);
}

void Scanner::setSpaceOption(spaceOptionT option) {
    spaceOption = option;
}

Scanner::spaceOptionT Scanner::getSpaceOption() {
    return spaceOption;
}

void Scanner::setStringOption(stringOptionT option) {
    stringOption = option;
}

Scanner::stringOptionT Scanner::getStringOption() {
    return stringOption;
}


/* Private functions */
/*
* Private method: skipSpaces
* Usage: skipSpaces();
* -------------------
* This function advances the position of the scanner until the
* current character is not a whitespace character.
*/
void Scanner::skipSpaces() {
    while (cp < len && isspace(buffer[cp])) {
        cp++;
    }
}

    /*
    * Private method: scanToEndOfIdentifier
    * Usage: finish = scanToEndOfIdentifier();
    * ----------------------------------------
    * This function advances the position of the scanner until it
    * reaches the end of a sequence of letters or digits that make
    * up an identifier. The return value is the index of the last
    * character in the identifier; the value of the stored index
    * cp is the first character after that.
    */
    int Scanner::scanToEndOfIdentifier() {
        while (cp < len && isalnum(buffer[cp])) {
            if ((stringOption == ScanQuotesAsStrings) && (buffer[cp] == '"')) 
                break;
            cp++;
        }
        return cp - 1;
    }


    /* Private functions */
    /*
    * Private method: scanQuotedString
    * Usage: scanQuotedString();
    * -------------------
    * This function advances the position of the scanner until the
    * current character is a double quotation mark
    */
    void Scanner::scanQuotedString() {
        while ((cp < len && (buffer[cp] == '"')) || (cp < len && (buffer[cp] == '"'))){
            cp++;
        }

scanner.h

/*
* File: scanner.h
* ---------------
* This file is the interface for a class that facilitates dividing
* a string into logical units called "tokens", which are either
*
* 1. Strings of consecutive letters and digits representing words
* 2. One-character strings representing punctuation or separators
*
* To use this class, you must first create an instance of a
* Scanner object by declaring
*
* Scanner scanner;
*
* You initialize the scanner's input stream by calling
*
* scanner.setInput(str);
*
* where str is the string from which tokens should be read.
* Once you have done so, you can then retrieve the next token
* by making the following call:
*
* token = scanner.nextToken();
*
* To determine whether any tokens remain to be read, you can call
* the predicate method scanner.hasMoreTokens(). The nextToken
* method returns the empty string after the last token is read.
*
* The following code fragment serves as an idiom for processing
* each token in the string inputString:
*
* Scanner scanner;
* scanner.setInput(inputString);
* while (scanner.hasMoreTokens()) {
* string token = scanner.nextToken();
* . . . process the token . . .
* }
*
* This version of the Scanner class includes an option for skipping
* whitespace characters, which is described in the comments for the
* setSpaceOption method.
*/
#ifndef _scanner_h
#define _scanner_h
#include "genlib.h"
/*
* Class: Scanner
* --------------
* This class is used to represent a single instance of a scanner.
*/
class Scanner {
public:
/*
* Constructor: Scanner
* Usage: Scanner scanner;
* -----------------------
* The constructor initializes a new scanner object. The scanner
* starts empty, with no input to scan.
*/
    Scanner();
/*
* Destructor: ~Scanner
* Usage: usually implicit
* -----------------------
* The destructor deallocates any memory associated with this scanner.
*/
    ~Scanner();
/*
* Method: setInput
* Usage: scanner.setInput(str);
* -----------------------------
* This method configures this scanner to start extracting
* tokens from the input string str. Any previous input string is
* discarded.
*/
    void setInput(string str);
/*
* Method: nextToken
* Usage: token = scanner.nextToken();
* -----------------------------------
* This method returns the next token from this scanner. If
* nextToken is called when no tokens are available, it returns the
* empty string.
*/
    string nextToken();
/*
* Method: hasMoreTokens
* Usage: if (scanner.hasMoreTokens()) . . .
* ------------------------------------------
* This method returns true as long as there are additional
* tokens for this scanner to read.
*/
    bool hasMoreTokens();
/*
* Methods: setSpaceOption, getSpaceOption
* Usage: scanner.setSpaceOption(option);
* option = scanner.getSpaceOption();
* ------------------------------------------
* This method controls whether this scanner
* ignores whitespace characters or treats them as valid tokens.
* By default, the nextToken function treats whitespace characters,
* such as spaces and tabs, just like any other punctuation mark.
* If, however, you call
*
* scanner.setSpaceOption(Scanner::IgnoreSpaces);
*
* the scanner will skip over any white space before reading a
* token. You can restore the original behavior by calling
*
* scanner.setSpaceOption(Scanner::PreserveSpaces);
*
* The getSpaceOption function returns the current setting
* of this option.
*/
    enum spaceOptionT { PreserveSpaces, IgnoreSpaces };
    void setSpaceOption(spaceOptionT option);
    spaceOptionT getSpaceOption();

/*
 * Methods: setStringOption, getStringOption
 * Usage: scanner.setStringOption(option);
 *        option = scanner.getStringOption();
 * --------------------------------------------------
 * This method controls how the scanner reads double quotation marks 
 * as input.  The default is set to treat quotes just like any other 
 * punctuation character: 
 *    scanner.setStringOption(Scanner::ScanQuotesAsPunctuation);
 * 
 * Otherwise, the option:
 *    scanner.setStringOption(Scanner::ScanQuotesAsStrings);
 *
 * the token starting with a quotation mark will be scanned until
 * another quotation mark is found (closing quotation). Therefore
 * the entire string within the quotation, including both quotation
 * marks counts as 1 token.
 */
    enum stringOptionT { ScanQuotesAsPunctuation, ScanQuotesAsStrings };

    void setStringOption(stringOptionT option);
    stringOptionT getStringOption();


private:

#include "scanpriv.h"
};
#endif

** and finally scanpriv.h **

/*
* File: scanpriv.h
* ----------------
* This file contains the private data for the simplified version
* of the Scanner class.
*/

/* Instance variables */
string buffer; /* The string containing the tokens */
int len; /* The buffer length, for efficiency */
int cp; /* The current index in the buffer */
spaceOptionT spaceOption; /* Setting of the space option */
stringOptionT stringOption;

/* Private method prototypes */
void skipSpaces();
int scanToEndOfIdentifier();
void scanQuotedString();

Aucun commentaire:

Enregistrer un commentaire