在Word / Excel中,您必须添加自定义属性。 (见图) Custom Properties。 正如你们可以看到的那样:"属性:",你可以在那里添加你想要的任何信息。 保存文件并转到文件夹中的文件位置时,可以右键单击 - >属性,您有所有选项卡:常规/安全/详细信息/以前的版本。使用该功能添加选项卡Custom。
现在我希望通过编码获得此信息:Custom Properties information。然后将其提取到记事本中。
到目前为止,我使用了SET FOREIGN_KEY_CHECKS=1;
,但后来我只获得了详细信息选项卡中的信息。我做了一些研究,并看到了Shell32
的一些可能性。但我想知道是否有可能在不安装其他DLL的情况下这样做?
到目前为止,这是我的代码DSOfile.dll
。
Shell32
提前致谢!
德苏
答案 0 :(得分:13)
历史上,这些属性由称为“结构化存储”的技术定义。第一个结构化存储实现是古老的(但仍然非常活跃)Compound File Format
之后,Microsoft添加了结构化存储功能directly into NTFS。这允许您在任何文件(甚至.txt)文件上定义作者或标题等属性。虽然Explorer UI不允许你出于某种原因再这样做,但我认为它仍然可以通过编程方式工作。
然后,在Vista中,微软重启了所有这些,并引入了所有这一切的超集:Windows Property System。
不幸的是,框架中没有所有这些的.NET API。但微软创建了一个名为Windows API CodePack的开源.NET库。因此,提取属性的最简单方法是添加对WindowsAPICodePack Core NugetPackage的引用,您可以像这样使用它:
static void Main(string[] args)
{
foreach (var prop in new ShellPropertyCollection(@"mypath\myfile"))
{
Console.WriteLine(prop.CanonicalName + "=" + prop.ValueAsObject);
}
}
如果您不想添加额外的DLL,则可以从WindowsAPICodePack源(Windows API Code Pack: Where is it?)中提取ShellPropertyCollection代码。这是一项相当重要的工作但是可行。
在您的情况下,另一种解决方案是使用旧的Structure Storage本机API。我提供了一个这样做的样本。以下是如何使用它:
static void Main(string[] args)
{
foreach (var prop in new StructuredStorage(@"mypath\myfile").Properties)
{
Console.WriteLine(prop.Name + "=" + prop.Value);
}
}
public sealed class StructuredStorage
{
public static readonly Guid SummaryInformationFormatId = new Guid("{F29F85E0-4FF9-1068-AB91-08002B27B3D9}");
public static readonly Guid DocSummaryInformationFormatId = new Guid("{D5CDD502-2E9C-101B-9397-08002B2CF9AE}");
public static readonly Guid UserDefinedPropertiesId = new Guid("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}");
private List<StructuredProperty> _properties = new List<StructuredProperty>();
public StructuredStorage(string filePath)
{
if (filePath == null)
throw new ArgumentNullException("filePath");
FilePath = filePath;
IPropertySetStorage propertySetStorage;
int hr = StgOpenStorageEx(FilePath, STGM.STGM_READ | STGM.STGM_SHARE_DENY_NONE | STGM.STGM_DIRECT_SWMR, STGFMT.STGFMT_ANY, 0, IntPtr.Zero, IntPtr.Zero, typeof(IPropertySetStorage).GUID, out propertySetStorage);
if (hr == STG_E_FILENOTFOUND || hr == STG_E_PATHNOTFOUND)
throw new FileNotFoundException(null, FilePath);
if (hr != 0)
throw new Win32Exception(hr);
try
{
LoadPropertySet(propertySetStorage, SummaryInformationFormatId);
LoadPropertySet(propertySetStorage, DocSummaryInformationFormatId);
}
finally
{
Marshal.ReleaseComObject(propertySetStorage);
}
// for some reason we can't read this one on the same COM ref?
LoadProperties(UserDefinedPropertiesId);
}
public string FilePath { get; private set; }
public IReadOnlyList<StructuredProperty> Properties
{
get
{
return _properties;
}
}
private void LoadPropertySet(IPropertySetStorage propertySetStorage, Guid fmtid)
{
IPropertyStorage propertyStorage;
int hr = propertySetStorage.Open(fmtid, STGM.STGM_READ | STGM.STGM_SHARE_EXCLUSIVE, out propertyStorage);
if (hr == STG_E_FILENOTFOUND || hr == STG_E_ACCESSDENIED)
return;
if (hr != 0)
throw new Win32Exception(hr);
IEnumSTATPROPSTG es;
propertyStorage.Enum(out es);
if (es == null)
return;
try
{
var stg = new STATPROPSTG();
int fetched;
do
{
hr = es.Next(1, ref stg, out fetched);
if (hr != 0 && hr != 1)
throw new Win32Exception(hr);
if (fetched == 1)
{
string name = GetPropertyName(fmtid, propertyStorage, stg);
var propsec = new PROPSPEC[1];
propsec[0] = new PROPSPEC();
propsec[0].ulKind = stg.lpwstrName != null ? PRSPEC.PRSPEC_LPWSTR : PRSPEC.PRSPEC_PROPID;
IntPtr lpwstr = IntPtr.Zero;
if (stg.lpwstrName != null)
{
lpwstr = Marshal.StringToCoTaskMemUni(stg.lpwstrName);
propsec[0].union.lpwstr = lpwstr;
}
else
{
propsec[0].union.propid = stg.propid;
}
var vars = new PROPVARIANT[1];
vars[0] = new PROPVARIANT();
try
{
hr = propertyStorage.ReadMultiple(1, propsec, vars);
if (hr != 0)
throw new Win32Exception(hr);
}
finally
{
if (lpwstr != IntPtr.Zero)
{
Marshal.FreeCoTaskMem(lpwstr);
}
}
object value;
try
{
switch (vars[0].vt)
{
case VARTYPE.VT_BOOL:
value = vars[0].union.boolVal != 0 ? true : false;
break;
case VARTYPE.VT_BSTR:
value = Marshal.PtrToStringUni(vars[0].union.bstrVal);
break;
case VARTYPE.VT_CY:
value = decimal.FromOACurrency(vars[0].union.cyVal);
break;
case VARTYPE.VT_DATE:
value = DateTime.FromOADate(vars[0].union.date);
break;
case VARTYPE.VT_DECIMAL:
IntPtr dec = IntPtr.Zero;
Marshal.StructureToPtr(vars[0], dec, false);
value = Marshal.PtrToStructure(dec, typeof(decimal));
break;
case VARTYPE.VT_DISPATCH:
value = Marshal.GetObjectForIUnknown(vars[0].union.pdispVal);
break;
case VARTYPE.VT_ERROR:
case VARTYPE.VT_HRESULT:
value = vars[0].union.scode;
break;
case VARTYPE.VT_FILETIME:
value = DateTime.FromFileTime(vars[0].union.filetime);
break;
case VARTYPE.VT_I1:
value = vars[0].union.cVal;
break;
case VARTYPE.VT_I2:
value = vars[0].union.iVal;
break;
case VARTYPE.VT_I4:
value = vars[0].union.lVal;
break;
case VARTYPE.VT_I8:
value = vars[0].union.hVal;
break;
case VARTYPE.VT_INT:
value = vars[0].union.intVal;
break;
case VARTYPE.VT_LPSTR:
value = Marshal.PtrToStringAnsi(vars[0].union.pszVal);
break;
case VARTYPE.VT_LPWSTR:
value = Marshal.PtrToStringUni(vars[0].union.pwszVal);
break;
case VARTYPE.VT_R4:
value = vars[0].union.fltVal;
break;
case VARTYPE.VT_R8:
value = vars[0].union.dblVal;
break;
case VARTYPE.VT_UI1:
value = vars[0].union.bVal;
break;
case VARTYPE.VT_UI2:
value = vars[0].union.uiVal;
break;
case VARTYPE.VT_UI4:
value = vars[0].union.ulVal;
break;
case VARTYPE.VT_UI8:
value = vars[0].union.uhVal;
break;
case VARTYPE.VT_UINT:
value = vars[0].union.uintVal;
break;
case VARTYPE.VT_UNKNOWN:
value = Marshal.GetObjectForIUnknown(vars[0].union.punkVal);
break;
default:
value = null;
break;
}
}
finally
{
PropVariantClear(ref vars[0]);
}
var property = new StructuredProperty(fmtid, name, stg.propid);
property.Value = value;
_properties.Add(property);
}
}
while (fetched == 1);
}
finally
{
Marshal.ReleaseComObject(es);
}
}
private static string GetPropertyName(Guid fmtid, IPropertyStorage propertyStorage, STATPROPSTG stg)
{
if (!string.IsNullOrEmpty(stg.lpwstrName))
return stg.lpwstrName;
var propids = new int[1];
propids[0] = stg.propid;
var names = new string[1];
names[0] = null;
int hr = propertyStorage.ReadPropertyNames(1, propids, names);
if (hr == 0)
return names[0];
return null;
}
public void LoadProperties(Guid formatId)
{
IPropertySetStorage propertySetStorage;
int hr = StgOpenStorageEx(FilePath, STGM.STGM_READ | STGM.STGM_SHARE_DENY_NONE | STGM.STGM_DIRECT_SWMR, STGFMT.STGFMT_ANY, 0, IntPtr.Zero, IntPtr.Zero, typeof(IPropertySetStorage).GUID, out propertySetStorage);
if (hr == STG_E_FILENOTFOUND || hr == STG_E_PATHNOTFOUND)
throw new FileNotFoundException(null, FilePath);
if (hr != 0)
throw new Win32Exception(hr);
try
{
LoadPropertySet(propertySetStorage, formatId);
}
finally
{
Marshal.ReleaseComObject(propertySetStorage);
}
}
private const int STG_E_FILENOTFOUND = unchecked((int)0x80030002);
private const int STG_E_PATHNOTFOUND = unchecked((int)0x80030003);
private const int STG_E_ACCESSDENIED = unchecked((int)0x80030005);
private enum PRSPEC
{
PRSPEC_LPWSTR = 0,
PRSPEC_PROPID = 1
}
private enum STGFMT
{
STGFMT_ANY = 4,
}
[Flags]
private enum STGM
{
STGM_READ = 0x00000000,
STGM_READWRITE = 0x00000002,
STGM_SHARE_DENY_NONE = 0x00000040,
STGM_SHARE_DENY_WRITE = 0x00000020,
STGM_SHARE_EXCLUSIVE = 0x00000010,
STGM_DIRECT_SWMR = 0x00400000
}
// we only define what we handle
private enum VARTYPE : short
{
VT_I2 = 2,
VT_I4 = 3,
VT_R4 = 4,
VT_R8 = 5,
VT_CY = 6,
VT_DATE = 7,
VT_BSTR = 8,
VT_DISPATCH = 9,
VT_ERROR = 10,
VT_BOOL = 11,
VT_UNKNOWN = 13,
VT_DECIMAL = 14,
VT_I1 = 16,
VT_UI1 = 17,
VT_UI2 = 18,
VT_UI4 = 19,
VT_I8 = 20,
VT_UI8 = 21,
VT_INT = 22,
VT_UINT = 23,
VT_HRESULT = 25,
VT_LPSTR = 30,
VT_LPWSTR = 31,
VT_FILETIME = 64,
}
[StructLayout(LayoutKind.Explicit)]
private struct PROPVARIANTunion
{
[FieldOffset(0)]
public sbyte cVal;
[FieldOffset(0)]
public byte bVal;
[FieldOffset(0)]
public short iVal;
[FieldOffset(0)]
public ushort uiVal;
[FieldOffset(0)]
public int lVal;
[FieldOffset(0)]
public uint ulVal;
[FieldOffset(0)]
public int intVal;
[FieldOffset(0)]
public uint uintVal;
[FieldOffset(0)]
public long hVal;
[FieldOffset(0)]
public ulong uhVal;
[FieldOffset(0)]
public float fltVal;
[FieldOffset(0)]
public double dblVal;
[FieldOffset(0)]
public short boolVal;
[FieldOffset(0)]
public int scode;
[FieldOffset(0)]
public long cyVal;
[FieldOffset(0)]
public double date;
[FieldOffset(0)]
public long filetime;
[FieldOffset(0)]
public IntPtr bstrVal;
[FieldOffset(0)]
public IntPtr pszVal;
[FieldOffset(0)]
public IntPtr pwszVal;
[FieldOffset(0)]
public IntPtr punkVal;
[FieldOffset(0)]
public IntPtr pdispVal;
}
[StructLayout(LayoutKind.Sequential)]
private struct PROPSPEC
{
public PRSPEC ulKind;
public PROPSPECunion union;
}
[StructLayout(LayoutKind.Explicit)]
private struct PROPSPECunion
{
[FieldOffset(0)]
public int propid;
[FieldOffset(0)]
public IntPtr lpwstr;
}
[StructLayout(LayoutKind.Sequential)]
private struct PROPVARIANT
{
public VARTYPE vt;
public ushort wReserved1;
public ushort wReserved2;
public ushort wReserved3;
public PROPVARIANTunion union;
}
[StructLayout(LayoutKind.Sequential)]
private struct STATPROPSTG
{
[MarshalAs(UnmanagedType.LPWStr)]
public string lpwstrName;
public int propid;
public VARTYPE vt;
}
[StructLayout(LayoutKind.Sequential)]
private struct STATPROPSETSTG
{
public Guid fmtid;
public Guid clsid;
public uint grfFlags;
public System.Runtime.InteropServices.ComTypes.FILETIME mtime;
public System.Runtime.InteropServices.ComTypes.FILETIME ctime;
public System.Runtime.InteropServices.ComTypes.FILETIME atime;
public uint dwOSVersion;
}
[DllImport("ole32.dll")]
private static extern int StgOpenStorageEx([MarshalAs(UnmanagedType.LPWStr)] string pwcsName, STGM grfMode, STGFMT stgfmt, int grfAttrs, IntPtr pStgOptions, IntPtr reserved2, [MarshalAs(UnmanagedType.LPStruct)] Guid riid, out IPropertySetStorage ppObjectOpen);
[DllImport("ole32.dll")]
private static extern int PropVariantClear(ref PROPVARIANT pvar);
[Guid("0000013B-0000-0000-C000-000000000046"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
private interface IEnumSTATPROPSETSTG
{
[PreserveSig]
int Next(int celt, ref STATPROPSETSTG rgelt, out int pceltFetched);
// rest ommited
}
[Guid("00000139-0000-0000-C000-000000000046"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
private interface IEnumSTATPROPSTG
{
[PreserveSig]
int Next(int celt, ref STATPROPSTG rgelt, out int pceltFetched);
// rest ommited
}
[Guid("00000138-0000-0000-C000-000000000046"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
private interface IPropertyStorage
{
[PreserveSig]
int ReadMultiple(uint cpspec, [MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 0)] PROPSPEC[] rgpspec, [Out, MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 0)] PROPVARIANT[] rgpropvar);
[PreserveSig]
int WriteMultiple(uint cpspec, [MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 0)] PROPSPEC[] rgpspec, [MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 0)] PROPVARIANT[] rgpropvar, uint propidNameFirst);
[PreserveSig]
int DeleteMultiple(uint cpspec, [MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 0)] PROPSPEC[] rgpspec);
[PreserveSig]
int ReadPropertyNames(uint cpropid, [MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 0)] int[] rgpropid, [Out, MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.LPWStr, SizeParamIndex = 0)] string[] rglpwstrName);
[PreserveSig]
int NotDeclared1();
[PreserveSig]
int NotDeclared2();
[PreserveSig]
int Commit(uint grfCommitFlags);
[PreserveSig]
int NotDeclared3();
[PreserveSig]
int Enum(out IEnumSTATPROPSTG ppenum);
// rest ommited
}
[Guid("0000013A-0000-0000-C000-000000000046"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
private interface IPropertySetStorage
{
[PreserveSig]
int Create([MarshalAs(UnmanagedType.LPStruct)] Guid rfmtid, [MarshalAs(UnmanagedType.LPStruct)] Guid pclsid, uint grfFlags, STGM grfMode, out IPropertyStorage ppprstg);
[PreserveSig]
int Open([MarshalAs(UnmanagedType.LPStruct)] Guid rfmtid, STGM grfMode, out IPropertyStorage ppprstg);
[PreserveSig]
int NotDeclared3();
[PreserveSig]
int Enum(out IEnumSTATPROPSETSTG ppenum);
}
}
public sealed class StructuredProperty
{
public StructuredProperty(Guid formatId, string name, int id)
{
FormatId = formatId;
Name = name;
Id = id;
}
public Guid FormatId { get; private set; }
public string Name { get; private set; }
public int Id { get; private set; }
public object Value { get; set; }
public override string ToString()
{
return Name;
}
}
答案 1 :(得分:2)
您可以尝试NPOI引擎从不同的Office文件(doc,xls,xlsx,docx等)中提取属性。此组件没有任何第三方依赖项,您不需要Office来使用它。
但是,这个库有点棘手,因为您需要为不同类型的文件使用不同类型的属性提取器。可以在官方Git Hub存储库TestHPSFPropertiesExtractor中找到好的代码示例。
可以找到NuGet包here。
答案 2 :(得分:1)
您可以使用一组名为NetOffice的NuGet包。每个Office应用程序都有一些软件包,还有一些核心程序集。获取NetOffice.Word和NetOffice.Excel并将其安装在您的解决方案中。 Codeplex site上有一些文档,但我必须浏览源代码才能真正理解发生了什么。这是一个示例程序:
using NetOffice.OfficeApi;
using System;
namespace Office_Doc_Reader
{
class Program
{
static void Main(string[] args)
{
using (var wordApp = new NetOffice.WordApi.Application())
using (var excelApp = new NetOffice.ExcelApi.Application())
{
var doc = wordApp.Documents.Open("C:\\Users\\John\\Desktop\\test.docx");
var xls = excelApp.Workbooks.Open("C:\\Users\\John\\Desktop\\test.xlsx");
var customProperties = (DocumentProperties)doc.CustomDocumentProperties;
foreach (var property in customProperties)
{
Console.WriteLine(String.Format("Name: {0}, Value: {1}, Type: {2}", property.Name, property.Value, property.Type));
}
customProperties = (DocumentProperties)xls.CustomDocumentProperties;
foreach (var property in customProperties)
{
Console.WriteLine(String.Format("Name: {0}, Value: {1}, Type: {2}", property.Name, property.Value, property.Type));
}
}
Console.ReadKey();
}
}
}
显示以下结果:
Name: Custom prop 1, Value: Text Value, Type: msoPropertyTypeString
Name: Custom prop 2, Value: 2/21/2016 12:00:00 AM, Type: msoPropertyTypeDate
Name: Custom prop 3, Value: 42, Type: msoPropertyTypeNumber
Name: Custom prop 4, Value: True, Type: msoPropertyTypeBoolean
Name: Foo, Value: abc, Type: msoPropertyTypeString
Name: Bar, Value: 1/1/1970 12:00:00 AM, Type: msoPropertyTypeDate
Name: Baz, Value: 3.14159, Type: msoPropertyTypeFloat
Name: Qux, Value: False, Type: msoPropertyTypeBoolean
对于具有以下属性的Word和Excel文件:
我根本没有这么多工作,所以我不能比这更深入。希望它有所帮助。
答案 3 :(得分:1)
您可以使用Microsoft的OpenXml SDK。我相信它能够从word / excel文件中获取任何信息。 看看https://msdn.microsoft.com/en-us/library/office/hh674468.aspx这个https://msdn.microsoft.com/en-us/library/office/bb448854.aspx 要么 http://openxmldeveloper.org/
答案 4 :(得分:0)
可能有更通用的方法。 如果您的来源是... x文件,则可以通过XML查询提取文档。 如果您解压缩文件(例如7-Zip),则会在子文件夹docProps中找到名为custom.xml的文件。 这些属性可以在XML文件中轻松找到
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/custom-properties" xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="2" name="SAPDOKNR">
<vt:lpwstr>10002210058</vt:lpwstr>
</property>
...