使用Azure Blob存储的HDInsight

时间:2016-03-06 07:20:06

标签: c# azure azure-storage-blobs hdinsight cortana-intelligence

我一直试图让它工作一段时间,所以会很感激一些帮助。我使用以下内容:

  • HDInsight模拟器

  • 已将core-site.xml中的模拟器的默认文件系统设置为指向我的Azure存储帐户

  • 当我部署到我的HDInsight cluter时问题就开始了。它完美地上传了映射器,然后由于某种原因返回了一个http 404,但我不知道为什么。为了调试,我决定使用wasb将我的HDInsight模拟器指向我的Azure存储帐户,以期获得更具描述性的错误。我做了,我得到的错误如下:java.io.exception:不完整的HDFS URI,没有主机:dhfs“/// user / faheem / dotnetcli ...我知道我的默认文件系统连接到azure因为当我浏览我的azure存储我看到我的本地机器已经上传了映射器代码。之后它就会爆炸。当连接到没有azure blob存储的模拟器(即原生HDFS文件系统)时,我得到了我的地图缩减代码才能完美地工作。请帮忙!!!
  • 我的代码是一个hello world应用程序,如下所示

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    using Microsoft.Hadoop.MapReduce;
    
    
    namespace ZA_Provinces_Vertices_Test
    {
        internal class Program
        {
            private static Uri _azureCluster = new Uri("");
            private static string _clusterUserName = "";
            private static string _clusterPassword = "";
            private static string _hadoopUserName = "";
    
            // Azure Storage Information.
            private static string _azureStorageAccount = "storage.blob.core.windows.net";
    
            private static string _azureStorageKey =
                "{Key}";
    
            private static string _azureStorageContainer = "storagecontainer";
            private static bool _createContinerIfNotExist = false;
    
            private static bool _cloudIndicator = false;
    
    
            private static void Main(string[] args)
            {
                //var result = ProvinceShapeFile.PointInProvince(26.6337895, -27.7573934);//Longitude, Latitude
    
                //Console.WriteLine(result);
                //Console.ReadLine();
    
                //var inputArray = new[]
                //{
                //    "200000000|23.168|-27.504",
                //    "200000001|23.169|-27.504"
                //};
    
                IHadoop hadoop;
    
                hadoop = _cloudIndicator ? ConnectAzure() : ConnectLocal();
    
                var result = hadoop.MapReduceJob.ExecuteJob<MyCustomHadoopJob>();
                //var output = StreamingUnit.Execute<MyCustomMapper>(inputArray);
    
                //foreach (var mapperResult in output.MapperResult)
                //{
                //    Console.WriteLine(mapperResult);
                //}
            }
    
            private static IHadoop ConnectLocal()
            {
                return Hadoop.Connect();
            }
    
            private static IHadoop ConnectAzure()
            {
                return Hadoop.Connect(_azureCluster,
                    _clusterUserName,
                    _hadoopUserName,
                    _clusterPassword,
                    _azureStorageAccount,
                    _azureStorageKey,
                    _azureStorageContainer,
                    _createContinerIfNotExist);
            }
    
            private static HadoopJobConfiguration LocalConfig()
            {
                HadoopJobConfiguration config = new HadoopJobConfiguration();
                config.InputPath = "Input/sqrt";
                config.OutputFolder = "Output/sqrt";
                config.DeleteOutputFolder = true;
    
                return config;
            }
    
            private static HadoopJobConfiguration AzureConfig()
            {
                HadoopJobConfiguration config = new HadoopJobConfiguration();
                config.InputPath =
                    "wasb://{container}@{storage}.blob.core.windows.net/user/faheem/ZAProvinceFromPoint/Input";
                config.OutputFolder =
                    "wasb://{container}@{storage}.blob.core.windows.net/user/faheem/ZAProvinceFromPoint/Output";
                config.DeleteOutputFolder = false;
    
                config.DeleteOutputFolder = false;
    
                return config;
            }
    
            public class MyCustomHadoopJob : HadoopJob<MyCustomMapper>
            {
                public override HadoopJobConfiguration Configure(ExecutorContext context)
                {
                    //return _cloudIndicator ? AzureConfig() : LocalConfig();
    
                    return AzureConfig();
                }
            }
    
            public class MyCustomMapper : MapperBase
            {
                public override void Map(string inputLine, MapperContext context)
                {
                    var fields = inputLine.Split(new[] {"|"}, StringSplitOptions.None).ToList();
                    double longitude = double.Parse(fields[1]);
                    double latitude = double.Parse(fields[2]);
    
                    string label = "Dummy"; //ProvinceShapeFile.PointInProvince(longitude, latitude);
    
                    string processedLine = String.Format("{0}|{1}", inputLine, label);
    
                    context.EmitKeyValue(processedLine, null);
                    //throw new NotImplementedException();
                }
            }
        }
    }
    

0 个答案:

没有答案