在Java中提取元数据

时间:2014-07-17 07:39:56

标签: java metadata jpeg

如何使用Java从仅JPEG图像中提取元数据?我尝试了Read Image Metadata from single file with Java

上的代码
import org.w3c.dom.*;

import java.io.*;
import java.util.*;
import javax.imageio.*;
import javax.imageio.stream.*;
import javax.imageio.metadata.*;

public class Metadata {

public static void main(String[] args) {
    Metadata meta = new Metadata();
    String filename = "C:\\Users\\Public\\Pictures\\download.jpg";
    if (new File(filename).exists()) {
        meta.readAndDisplayMetadata(filename);
    } else {
        System.out.println("cannot find file: " + filename);
    }

}

void readAndDisplayMetadata( String fileName ) {
    try {

        File file = new File( fileName );
        ImageInputStream iis = ImageIO.createImageInputStream(file);
        Iterator<ImageReader> readers = ImageIO.getImageReaders(iis);

        if (readers.hasNext()) {

            // pick the first available ImageReader
            ImageReader reader = readers.next();

            // attach source to the reader
            reader.setInput(iis, true);

            // read metadata of first image
            IIOMetadata metadata = reader.getImageMetadata(0);

            String[] names = metadata.getMetadataFormatNames();
            int length = names.length;
            for (int i = 0; i < length; i++) {
                System.out.println( "Format name: " + names[ i ] );
                displayMetadata(metadata.getAsTree(names[i]));
            }
        }
    }
    catch (Exception e) {
        e.printStackTrace();
    }
}

void displayMetadata(Node root) {
displayMetadata(root, 0);
}

void indent(int level) {
for (int i = 0; i < level; i++)
    System.out.print("    ");
}

void displayMetadata(Node node, int level) {
// print open tag of element
indent(level);
System.out.print("<" + node.getNodeName());
NamedNodeMap map = node.getAttributes();
if (map != null) {

    // print attribute values
    int length = map.getLength();
    for (int i = 0; i < length; i++) {
        Node attr = map.item(i);
        System.out.print(" " + attr.getNodeName() +
                         "=\"" + attr.getNodeValue() + "\"");
    }
}

Node child = node.getFirstChild();
if (child == null) {
    // no children, so close element and return
    System.out.println("/>");
    return;
}

// children, so close current tag
System.out.println(">");
while (child != null) {
    // print children recursively
    displayMetadata(child, level + 1);
    child = child.getNextSibling();
}

// print close tag of element
indent(level);
System.out.println("</" + node.getNodeName() + ">");
}
}

这是我使用的代码。我得到的输出有点奇怪。我要提取的元数据是文件名,文件类型,MIME类型,维度,编码过程,每个样本的位数,颜色通道,文件大小,JFIF版本,分辨率单位,x和y分辨率以及MD5和SHA1哈希值值。

输出:

Format name: javax_imageio_jpeg_image_1.0
<javax_imageio_jpeg_image_1.0>
<JPEGvariety>
    <app0JFIF majorVersion="1" minorVersion="1" resUnits="1" Xdensity="96"    Ydensity="96" thumbWidth="0" thumbHeight="0"/>
</JPEGvariety>
<markerSequence>
    <unknown MarkerTag="225"/>
    <unknown MarkerTag="225"/>
    <dqt>
        <dqtable elementPrecision="0" qtableId="0"/>
    </dqt>
    <dqt>
        <dqtable elementPrecision="0" qtableId="1"/>
    </dqt>
    <sof process="0" samplePrecision="8" numLines="239" samplesPerLine="211" numFrameComponents="3">
        <componentSpec componentId="1" HsamplingFactor="2" VsamplingFactor="2" QtableSelector="0"/>
        <componentSpec componentId="2" HsamplingFactor="1" VsamplingFactor="1" QtableSelector="1"/>
        <componentSpec componentId="3" HsamplingFactor="1" VsamplingFactor="1" QtableSelector="1"/>
    </sof>
    <dht>
        <dhtable class="0" htableId="0"/>
    </dht>
    <dht>
        <dhtable class="1" htableId="0"/>
    </dht>
    <dht>
        <dhtable class="0" htableId="1"/>
    </dht>
    <dht>
        <dhtable class="1" htableId="1"/>
    </dht>
    <sos numScanComponents="3" startSpectralSelection="0" endSpectralSelection="63" approxHigh="0" approxLow="0">
        <scanComponentSpec componentSelector="1" dcHuffTable="0" acHuffTable="0"/>
        <scanComponentSpec componentSelector="2" dcHuffTable="1" acHuffTable="1"/>
        <scanComponentSpec componentSelector="3" dcHuffTable="1" acHuffTable="1"/>
    </sos>
   </markerSequence>
</javax_imageio_jpeg_image_1.0>
Format name: javax_imageio_1.0
<javax_imageio_1.0>
<Chroma>
    <ColorSpaceType name="YCbCr"/>
    <NumChannels value="3"/>
</Chroma>
<Compression>
    <CompressionTypeName value="JPEG"/>
    <Lossless value="FALSE"/>
    <NumProgressiveScans value="1"/>
</Compression>
<Dimension>
    <PixelAspectRatio value="1.0"/>
    <ImageOrientation value="normal"/>
    <HorizontalPixelSize value="0.26458332"/>
    <VerticalPixelSize value="0.26458332"/>
</Dimension>
</javax_imageio_1.0>

2 个答案:

答案 0 :(得分:2)

你应该看看Apache Tika

答案 1 :(得分:0)

结帐metadata-extractor。它允许您从JPEG和其他类型的图像文件中读出多种图像元数据。