使用.txt和.pdf文件

时间:2017-11-29 08:56:09

标签: c++

我对c ++很陌生,我编写了一个代码来计算文本文件的字母数和特殊符号。当我使用.txt文件测试时,一切正常,它会正确计算字母和符号的数量,但是当我使用.pdf文件测试时,数字不正确。

#include "stdafx.h"
#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>
#include <array>
using namespace std;

bool lettersCounter(string, int*, int*);
bool writeToFile(string, string, int*, int*);

int main(void)
{
    string inFileName, outFileName = "analyzed.txt";
    char c, letter = 'A';
    int letters[26] = {0};
    int signs[4] = {0};
    cout<<"Please enter the file name you want to scan:"<<endl;
    getline(cin, inFileName);
    if(!(lettersCounter(inFileName, letters, signs)))
    {
        cout<<"File not found!"<<endl;
        return 0;
    }
    if(writeToFile(inFileName, outFileName, letters, signs))
        cout<<"Done."<<endl;
    return 0;
}
bool lettersCounter(string inFileName, int* letters, int* signs)
{
    size_t lower, upper, j = 0;
    char c;
    for(lower=97, upper=65; lower<=122; lower++, upper++) //ascii code of a-z and A-Z
    {
        ifstream inStream(inFileName);
        if(inStream == 0)
            return false;
        while(!inStream.eof())
        {
            inStream.get(c);
            if((int)c == lower || (int)c == upper)
                letters[j]++;
        }
        j++;
        inStream.close();
    }
    ifstream inStream(inFileName);
    while(!inStream.eof())
    {
        inStream.get(c);
        switch(c)
        {
            case '\n':
                signs[0]++;
                break;
            case '.':
                signs[1]++;
                break;
            case ' ':
                signs[2]++;
                break;
            case ',':
                signs[3]++;
                break;
        }
    }
    inStream.close();
}
bool writeToFile(string inFileName, string outFileName, int* letters, int* signs)
{
    char c, letter = 'A';
    ofstream outStream(inFileName + " " + outFileName);
    for(size_t i=65; i<=90; i++)
        outStream<<setw(4)<<left<<(char)i<<" ";
    outStream<<endl;
    for(size_t i=0; i<26; i++)
        outStream<<setw(4)<<left<<letters[i]<<" ";
    outStream<<endl<<"New line: "<<signs[0]<<endl<<"Point: "<<signs[1]<<endl<<"Space: "<<signs[2]<<endl<<"Comma: "<<signs[3]<<endl;
    outStream.close();
    return true;
}

你认为这段代码出了什么问题吗?

1 个答案:

答案 0 :(得分:0)

PDF具有特殊编码,因此您无法将其作为文本打开。

您可以使用messenger document C ++库从PDF文件中提取纯文本。