我在“阅读PDF”的iPhone应用程序开发中遇到了一些问题。 我试过以下代码。我知道我使用了错误的方法进行解析 - 解析方法仅用于搜索目的。但我想将整个pdf文本转换为字符串。比如Apple的MobileHIG.pdf - 我在这段代码中使用过。
@implementation NetPDFViewController
size_t totalPages; // a variable to store total pages
// a method to get the pdf ref
CGPDFDocumentRef MyGetPDFDocumentRef (const char *filename) {
CFStringRef path;
CFURLRef url;
CGPDFDocumentRef document;
path = CFStringCreateWithCString (NULL, filename,kCFStringEncodingUTF8);
url = CFURLCreateWithFileSystemPath (NULL, path, kCFURLPOSIXPathStyle, 0);
CFRelease (path);
document = CGPDFDocumentCreateWithURL (url);// 2
CFRelease(url);
int count = CGPDFDocumentGetNumberOfPages (document);// 3
if (count == 0) {
printf("`%s' needs at least one page!", filename);
return NULL;
}
return document;
}
// table methods to parse pdf
static void op_MP (CGPDFScannerRef s, void *info) {
const char *name;
if (!CGPDFScannerPopName(s, &name))
return;
printf("MP /%s\n", name);
}
static void op_DP (CGPDFScannerRef s, void *info) {
const char *name;
if (!CGPDFScannerPopName(s, &name))
return;
printf("DP /%s\n", name);
}
static void op_BMC (CGPDFScannerRef s, void *info) {
const char *name;
if (!CGPDFScannerPopName(s, &name))
return;
printf("BMC /%s\n", name);
}
static void op_BDC (CGPDFScannerRef s, void *info) {
const char *name;
if (!CGPDFScannerPopName(s, &name))
return;
printf("BDC /%s\n", name);
}
static void op_EMC (CGPDFScannerRef s, void *info) {
const char *name;
if (!CGPDFScannerPopName(s, &name))
return;
printf("EMC /%s\n", name);
}
// a method to display pdf page.
void MyDisplayPDFPage (CGContextRef myContext,size_t pageNumber,const char *filename) {
CGPDFDocumentRef document;
CGPDFPageRef page;
document = MyGetPDFDocumentRef (filename);// 1
totalPages=CGPDFDocumentGetNumberOfPages(document);
page = CGPDFDocumentGetPage (document, pageNumber);// 2
CGPDFDictionaryRef d;
d = CGPDFPageGetDictionary(page);
// ----- edit problem here - CGPDFDictionary is completely unknown
// ----- as we don't know keys & values of it.
CGPDFScannerRef myScanner;
CGPDFOperatorTableRef myTable;
myTable = CGPDFOperatorTableCreate();
CGPDFOperatorTableSetCallback (myTable, "MP", &op_MP);
CGPDFOperatorTableSetCallback (myTable, "DP", &op_DP);
CGPDFOperatorTableSetCallback (myTable, "BMC", &op_BMC);
CGPDFOperatorTableSetCallback (myTable, "BDC", &op_BDC);
CGPDFOperatorTableSetCallback (myTable, "EMC", &op_EMC);
CGPDFContentStreamRef myContentStream = CGPDFContentStreamCreateWithPage (page);// 3
myScanner = CGPDFScannerCreate (myContentStream, myTable, NULL);// 4
CGPDFScannerScan (myScanner);// 5
// CGPDFDictionaryRef d;
CGPDFStringRef str; // represents a sequence of bytes
d = CGPDFPageGetDictionary(page);
if (CGPDFDictionaryGetString(d, "Thumb", &str)){
CFStringRef s;
s = CGPDFStringCopyTextString(str);
if (s != NULL) {
//need something in here in case it cant find anything
NSLog(@"%@ testing it", s);
}
CFRelease(s);
// CFDataRef data = CGPDFStreamCopyData (stream, CGPDFDataFormatRaw);
}
// -----------------------------------
CGContextDrawPDFPage (myContext, page);// 3
CGContextTranslateCTM(myContext, 0, 20);
CGContextScaleCTM(myContext, 1.0, -1.0);
CGPDFDocumentRelease (document);// 4
}
- (void)viewDidLoad {
[super viewDidLoad];
// --------------------------------------------------------
// code for simple direct image from pdf docs.
UIGraphicsBeginImageContext(CGSizeMake(320, 460));
initialPage=28;
MyDisplayPDFPage(UIGraphicsGetCurrentContext(), initialPage, [[[NSBundle mainBundle] pathForResource:@"MobileHIG" ofType:@"pdf"] UTF8String]);
imgV.image=UIGraphicsGetImageFromCurrentImageContext();
imgV.image=[imgV.image rotate:UIImageOrientationDownMirrored];
}
- (void)touchesBegan:(NSSet *)touches withEvent:(UIEvent *)event{
UITouch *touch = [touches anyObject];
CGPoint LasttouchPoint = [touch locationInView:self.view];
int LasttouchX = LasttouchPoint.x;
startpoint=LasttouchX;
}
- (void)touchesMoved:(NSSet *)touches withEvent:(UIEvent *)event{
}
- (void)touchesEnded:(NSSet *)touches withEvent:(UIEvent *)event{
UITouch *touch = [touches anyObject];
CGPoint LasttouchPoint = [touch locationInView:self.view];
int LasttouchX = LasttouchPoint.x;
endpoint=LasttouchX;
if(startpoint>(endpoint+75)){
initialPage++;
[self loadPage:initialPage nextOne:YES];
} else if((startpoint+75)<endpoint){
initialPage--;
[self loadPage:initialPage nextOne:NO];
}
}
-(void)loadPage:(NSUInteger)page nextOne:(BOOL)yesOrNo{
if(page<=totalPages && page>0){
UIGraphicsBeginImageContext(CGSizeMake(720, 720));
MyDisplayPDFPage(UIGraphicsGetCurrentContext(), page, [[[NSBundle mainBundle] pathForResource:@"MobileHIG" ofType:@"pdf"] UTF8String]);
CATransition *transition = [CATransition animation];
transition.duration = 0.75;
transition.timingFunction = [CAMediaTimingFunction functionWithName:kCAMediaTimingFunctionEaseInEaseOut];
transition.type=kCATransitionPush;
if(yesOrNo){
transition.subtype=kCATransitionFromRight;
} else {
transition.subtype=kCATransitionFromLeft;
}
transition.delegate = self;
[imgV.layer addAnimation:transition forKey:nil];
imgV.image=UIGraphicsGetImageFromCurrentImageContext();
imgV.image=[imgV.image rotate:UIImageOrientationDownMirrored];
}
}
但是我从pdf文档中读到一行也没有成功。 什么仍然缺少?
答案 0 :(得分:14)
如果要从pdf文件中提取某些内容,则可能需要阅读以下内容:
来自Quartz 2D编程指南。
基本上,您将使用CGPDFScanner
对象来解析内容,其工作方式如下。您注册了一些回调,这些回调将在遇到pdf流中的某些pdf运算符时由Quartz 2D自动调用。在这个初始步骤之后,您实际上开始解析pdf流。
简要介绍一下您的代码,您似乎没有按照解析通过CGPDFDocumentGetPage()
的页面的pdf内容所需的步骤进行操作。您需要先使用CGPDFOperatorTableCreate()
和CGPDFOperatorTableSetCallback()
设置回调,然后获取页面,您需要使用该页面创建内容流(使用CGPDFContentStreamCreateWithPage()
)然后实例化{{ 1}}到CGPDFScanner
并实际开始扫描CGPDFScannerCreate()
。
上述URL指出的文档的“解析PDF内容”部分为您提供了实现pdf解析所需的所有信息。
希望这有帮助。
答案 1 :(得分:5)
我有一个库,可以在这里链接这个确切的事情:Extracting pdf text in Objective C
答案 2 :(得分:4)
查看QuartzDemo示例应用程序如何执行此操作,特别是QuartzImages.h和QuartzImages.m文件中的QuartzPDFView类。它显示了通过Quartz加载PDF的示例。