蜂巢:无法从Google Cloud Platform读取文件

时间:2019-05-10 20:21:24

标签: java maven hive google-cloud-storage nosuchmethoderror

我正在尝试在配置单元查询中读取GCP存储桶中存在的文件。

基本上,我要做的就是

import com.google.cloud.storage.Storage;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.StorageOptions;

Storage storage = StorageOptions.getDefaultInstance().getService();
Blob blob = storage.get(BlobId.of(bucketName, srcFilename));
String fileContent = new String(blob.getContent());
return fileContent;

现在,当我在Mac上运行此程序时,它可以工作(我以可以访问存储桶的方式进行了gcloud设置)

现在,我希望具有相同的功能,但是要配置为udf。所以,我建立了一个非常简单的罐子

import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;     
import org.apache.hadoop.hive.ql.udf.UDFType;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.StorageOptions;

@UDFType(deterministic = true)
public class MyAwesomeUDF extends GenericUDF{

@Override
    public String process(String srcFilename, String bucketName) throws IOException {
        Storage storage = StorageOptions.getDefaultInstance().getService();
    Blob blob = storage.get(BlobId.of(bucketName, srcFilename));
    String fileContent = new String(blob.getContent());
    return fileContent;
    }

}

这是我的pom.xml

<dependencies>
        <!-- https://mvnrepository.com/artifact/com.google.cloud/google-cloud-storage -->
        <dependency>
            <groupId>com.google.cloud</groupId>
            <artifactId>google-cloud-storage</artifactId>
            <version>1.71.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-serde</artifactId>
            <version>1.2.1</version>
            <exclusions>
                <exclusion>
                    <groupId>log4j</groupId>
                    <artifactId>log4j</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>log4j</groupId>
                    <artifactId>apache-log4j-extras</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
    </dependencies>
    <plugins>
<plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>2.4.1</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <finalName>hive-exe-jar-with-dependencies</finalName>
                            <filters>
                                <filter>
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                            <relocations>
                                <relocation>
                                    <pattern>com.google.common</pattern>
                                    <shadedPattern>repackaged.com.google.common</shadedPattern>
                                </relocation>
                            </relocations>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
    </plugins>

接下来,我构建了这个jar,可以在VM上运行它。

最后,这是我要运行的配置单元查询

add jar /path/to/my/awesome/jar;
use myDb;

create temporary function awesome_fun as 'package.path.to.my.MyAwesomeUDF';

        select
            awesome_fun('bucketName','srcFileName');

但是我在这里

Exception in thread "main" java.lang.NoSuchMethodError: com.google.api.services.storage.Storage$Objects$Get.setUserProject(Ljava/lang/String;)Lcom/google/api/services/storage/Storage$Objects$Get;
    at com.google.cloud.storage.spi.v1.HttpStorageRpc.getCall(HttpStorageRpc.java:403)
    at com.google.cloud.storage.spi.v1.HttpStorageRpc.get(HttpStorageRpc.java:411)
    at com.google.cloud.storage.StorageImpl$5.call(StorageImpl.java:198)
    at com.google.cloud.storage.StorageImpl$5.call(StorageImpl.java:195)
    at com.google.api.gax.retrying.DirectRetryingExecutor.submit(DirectRetryingExecutor.java:89)
    at com.google.cloud.RetryHelper.run(RetryHelper.java:74)
    at com.google.cloud.RetryHelper.runWithRetries(RetryHelper.java:51)
    at com.google.cloud.storage.StorageImpl.get(StorageImpl.java:195)
    at com.google.cloud.storage.StorageImpl.get(StorageImpl.java:209)

错误发生在

Storage storage = StorageOptions.getDefaultInstance().getService();

此外,在构建罐子之后,我可以看到(使用jar -tfcom.google.api.services.storage.Storage$Objects$Get存在。

我在做什么错了?

1 个答案:

答案 0 :(得分:0)

问题是缺少方法,请在编译或验证编译后的类和库的版本相同时,确保实际运行的类文件已更新。