我不想在python中使用pdfbox,我已使用此https://pypi.org/project/python-pdfbox/安装,但是当我尝试运行p = pdfbox.PDFBox()
时,我收到了以下错误。
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/suyog/anaconda3/lib/python3.6/site-packages/pdfbox/__init__.py", line 81, in __init__
self.pdfbox_path = self._get_pdfbox_path()
File "/home/suyog/anaconda3/lib/python3.6/site-packages/pdfbox/__init__.py", line 57, in _get_pdfbox_path
r = urllib.request.urlopen(pdfbox_url)
File "/home/suyog/anaconda3/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/home/suyog/anaconda3/lib/python3.6/urllib/request.py", line 532, in open
response = meth(req, response)
File "/home/suyog/anaconda3/lib/python3.6/urllib/request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "/home/suyog/anaconda3/lib/python3.6/urllib/request.py", line 570, in error
return self._call_chain(*args)
File "/home/suyog/anaconda3/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/home/suyog/anaconda3/lib/python3.6/urllib/request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: Not Found
知道如何在ubuntu中使用PDFBOX吗?
答案 0 :(得分:1)
所以,现有的发行版似乎已经过时了:
我冒昧地分配现有的回购并实施补丁。可以找到此包装器的工作版本here。
要使用pip从我的存储库安装,请按照发布的here指示进行操作。或者,下载源并在目录中运行namespace Data_Consistency
{
public partial class MainWindow : System.Windows.Window
{
Excelhelper g_objExcelHelper = new Excelhelper();
public MainWindow()
{
InitializeComponent();
}
private void tb_TextChanged(object sender, TextChangedEventArgs e)
{
}
private void Browse_Click(object sender, RoutedEventArgs e)
{
try
{
OpenFileDialog openFileDialog1 = new OpenFileDialog();
openFileDialog1.InitialDirectory = @"C:\";
openFileDialog1.Title = "Browse Text Files";
bool? l_dialogResult = openFileDialog1.ShowDialog();
if (l_dialogResult.HasValue && l_dialogResult.Value)
{
tb.Text = openFileDialog1.FileName;
string filepath = tb.Text;
string file_ext = System.IO.Path.GetExtension(filepath);
string l_connection = "";
string l_selectedSheet = cb.Text;
string l_inputSheetRange = tb1.Text;
string l_connectionString = "";
string l_filePath = tb.Text;
string l_fileExt = System.IO.Path.GetExtension(l_filePath);
/* Microsoft.Office.Interop.Excel.Application excelApp = new Microsoft.Office.Interop.Excel.Application();
string workbookPath = l_filePath;
Workbook excelWorkbook = excelApp.Workbooks.Open(workbookPath);
Worksheet xlWorksheet = excelWorkbook.Sheets[1];
Range cells = xlWorksheet.Cells;
cells["A:AZ"].NumberFormat = "@";
excelWorkbook.Save();*/
if (file_ext == ".xlsx" || file_ext == ".xls" || file_ext == ".xlsm")
{
if (file_ext == ".xls")
l_connection = @"Provider=Microsoft.ACE.OLEDB.4.0;Data Source=" + l_filePath + ";Extended Properties='Excel 8.0;HDR=NO;IMEX=1';";
else
{
l_connection = @"Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + l_filePath + ";Extended Properties='Excel 12.0;HDR=YES;IMEX=1'";
l_connection = @"Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + l_filePath + ";Extended Properties='Excel 12.0;HDR=YES;IMEX=1;MAXSCANROWS=0'";
}
Excelhelper ex = new Excelhelper();
string[] sheetnames = ex.GetExcelSheetNames(l_connection);
foreach (string item in sheetnames)
{
if (!item.EndsWith("$'") && !item.EndsWith("$"))
continue;
string l_item = item.Substring(0, item.IndexOf("$"));
if (l_item.StartsWith("'"))
{
l_item = item.Substring(1, item.IndexOf("$") - 1);
}
cb.Items.Add(l_item);
}
}
else
{
MessageBox.Show("please select the file.");
}
}
}
catch (NullReferenceException ex)
{
MessageBox.Show("DialogResult Value is null", "Data Consistency Tool", MessageBoxButton.OK, MessageBoxImage.Information);
}
}
private void tb1_TextChanged(object sender, TextChangedEventArgs e)
{
}
private void Apply_Click(object sender, RoutedEventArgs e)
{
string l_selectedSheet = cb.Text;
string l_inputSheetRange = tb1.Text;
string l_connectionString = "";
string l_filePath = tb.Text;
string l_fileExt = System.IO.Path.GetExtension(l_filePath);
if (l_fileExt.CompareTo(".xls") == 0)
l_connectionString = @"provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + l_filePath + ";Extended Properties='Excel 8.0;HRD=NO;IMEX=0';ImportMixedTypes=Text";
else
{
l_connectionString = @"Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + l_filePath + ";Extended Properties='Excel 12.0;HDR=YES;IMEX=1'";
l_connectionString = @"Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + l_filePath + ";Extended Properties='Excel 12.0;HDR=YES;IMEX=1;MAXSCANROWS=0'";
}
g_objExcelHelper.g_objDtInput = g_objExcelHelper.GetDataTable(l_connectionString, l_selectedSheet, l_inputSheetRange, g_objExcelHelper.g_objDtInput);
}
private void TextBox_TextChanged(object sender, TextChangedEventArgs e)
{
}
private void tb2_TextChanged(object sender, TextChangedEventArgs e)
{
}
private void ApplyButton_Click_2(object sender, RoutedEventArgs e)
{
string l_selectedSheet = cb1.Text;
string l_inputSheetRange = tb3.Text;
string l_connectionString = "";
string l_filePath = tb2.Text;
string l_fileExt = System.IO.Path.GetExtension(l_filePath);
if (l_fileExt.CompareTo(".xls") == 0)
l_connectionString = @"provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + l_filePath + ";Extended Properties='Excel 8.0;HRD=NO;IMEX=0';ImportMixedTypes=Text";
else
{
l_connectionString = @"Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + l_filePath + ";Extended Properties='Excel 12.0;HDR=YES;IMEX=1'";
l_connectionString = @"Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + l_filePath + ";Extended Properties='Excel 12.0;HDR=YES;IMEX=1;MAXSCANROWS=0'";
}
g_objExcelHelper.g_objDtInput = g_objExcelHelper.GetDataTable(l_connectionString, l_selectedSheet, l_inputSheetRange, g_objExcelHelper.g_objDtInput);
}
private void tb3_TextChanged(object sender, TextChangedEventArgs e)
{
}
private void Search_Button_Click(object sender, RoutedEventArgs e)
{
try
{
OpenFileDialog openFileDialog1 = new OpenFileDialog();
openFileDialog1.InitialDirectory = @"C:\";
openFileDialog1.Title = "Browse Text Files";
bool? l_dialogResult = openFileDialog1.ShowDialog();
if (l_dialogResult.HasValue && l_dialogResult.Value)
{
tb2.Text = openFileDialog1.FileName;
string filepath = tb2.Text;
string file_ext = System.IO.Path.GetExtension(filepath);
string l_connection = "";
if (file_ext == ".xlsx" || file_ext == ".xls" || file_ext == ".xlsm")
{
if (file_ext == ".xls")
l_connection = @"Provider=Microsoft.ACE.OLEDB.4.0;Data Source=" + filepath + ";Extended Properties='Excel 8.0;HDR=NO;IMEX=1';";
else
l_connection = @"Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + filepath + ";Extended Properties='Excel 12.0;HDR=YES;IMEX=1';";
Excelhelper ex = new Excelhelper();
string[] sheetnames = ex.GetExcelSheetNames1(l_connection);
foreach (string item in sheetnames)
{
if (!item.EndsWith("$'") && !item.EndsWith("$"))
continue;
string l_item = item.Substring(0, item.IndexOf("$"));
if (l_item.StartsWith("'"))
{
l_item = item.Substring(1, item.IndexOf("$") - 1);
}
cb1.Items.Add(l_item);
}
}
else
{
MessageBox.Show("please select the file.");
}
}
}
catch (NullReferenceException ex)
{
MessageBox.Show("DialogResult Value is null", "Data Consistency Tool", MessageBoxButton.OK, MessageBoxImage.Information);
}
}
}
}
。
运行代码对我有用:
python setup.py install
答案 1 :(得分:1)
添加此答案,因为对于初次安装此工具的人来说还不完整。
执行pip install python-pdfbox
指向项目https://pypi.org/project/python-pdfbox/,这是预期的行为。
用法说明指示要实例化pdfbox对象,例如:p = pdfbox.PDFbox()
。
在这一点上,我们中一些寻求答案的人可能在此问题中遇到了HTTP错误。
查看存储库,请注意要下载的pdfbox版本为hardcoded。这意味着pip安装此软件包的任何人都必须足够“幸运”,才能使apache pdfbox(这是一个Java库)的版本与此版本相同。
免责声明:我试图使此功能适用于Windows 10。
程序包初始化在environment variable上查找pdfbox-app。如果找不到,它将尝试下载一个。因此是错误。
pdfbox-app-{version}.jar
。set PDFBOX=C:\Dev\pdfbox-app-2.0.11.jar
import pdfbox
p = pdfbox.PDFBox()
p.extract_text("some_filename")
注意事项:extract_text()无法识别带有空格的空格文件名,