我在Spark的结构化编程中运行批处理。下面的代码片段引发错误,说“kafka不是有效的Spark SQL数据源;”。我正在使用的版本是 - >火花-SQL卡夫卡0-10_2.10。非常感谢您的帮助。感谢。
Dataset<Row> df = spark
.read()
.format("kafka")
.option("kafka.bootstrap.servers", "*****")
.option("subscribePattern", "test.*")
.option("startingOffsets", "earliest")
.option("endingOffsets", "latest")
.load();
Exception in thread "main" org.apache.spark.sql.AnalysisException: kafka is not a valid Spark SQL Data Source.;
答案 0 :(得分:2)
我遇到了同样的问题,就像我一样,你使用的是read而不是readStream。
将spark.read()
更改为spark.readStream
对我来说很好。
答案 1 :(得分:0)
使用using ProjectTracker.Models;
using System.Collections.Generic;
using System;
using ProjectTracker.Data;
using System.Linq;
namespace ProjectTracker.Services
{
public interface IProjectData
{
IEnumerable<Project> GetAcceptingParticipants();
IEnumerable<Project> GetInProgress();
Project ParticipatingIn(int id); //Pass userId to this, returns the project that the user is part of
Project Add(Project newProject);
Project Get(int id);
void Delete(int id);
void Commit();
}
public class SqlProjectData : IProjectData
{
private ApplicationDbContext _context;
public SqlProjectData(ApplicationDbContext context)
{
_context = context;
}
public Project Add(Project newProject)
{
_context.Add(newProject);
Commit();
return newProject;
}
public void Commit()
{
_context.SaveChanges();
}
public void Delete(int id)
{
var toBeDeleted = Get(id);
if (toBeDeleted == null) return;
_context.Remove<Project>(toBeDeleted);
}
public Project Get(int id)
{
return _context.Project.FirstOrDefault(r => r.Id == id);
}
public IEnumerable<Project> GetAcceptingParticipants()
{
throw new NotImplementedException();
}
public IEnumerable<Project> GetInProgress()
{
throw new NotImplementedException();
}
public Project ParticipatingIn(int id)
{
throw new NotImplementedException();
}
}
}
机制并传递spark-submit
根据您自己的情况调整该库中的kafka,scala和spark的版本。