public class SQLContext extends java.lang.Object implements Logging, scala.Serializable
DataFrame
objects as well as the execution of SQL queries.
Modifier and Type | Class and Description |
---|---|
class |
SQLContext.implicits$
:: Experimental ::
(Scala-specific) Implicit methods available in Scala for converting
common Scala objects into
DataFrame s. |
protected class |
SQLContext.QueryExecution |
protected class |
SQLContext.SparkPlanner |
Constructor and Description |
---|
SQLContext(JavaSparkContext sparkContext) |
SQLContext(SparkContext sparkContext) |
Modifier and Type | Method and Description |
---|---|
protected void |
addJar(java.lang.String path)
Add a jar to SQLContext
|
protected org.apache.spark.sql.catalyst.analysis.Analyzer |
analyzer() |
DataFrame |
applySchema(JavaRDD<?> rdd,
java.lang.Class<?> beanClass) |
DataFrame |
applySchema(JavaRDD<Row> rowRDD,
StructType schema) |
DataFrame |
applySchema(RDD<?> rdd,
java.lang.Class<?> beanClass) |
DataFrame |
applySchema(RDD<Row> rowRDD,
StructType schema) |
protected DataFrame |
applySchemaToPythonRDD(RDD<java.lang.Object[]> rdd,
java.lang.String schemaString) |
protected DataFrame |
applySchemaToPythonRDD(RDD<java.lang.Object[]> rdd,
StructType schema) |
DataFrame |
baseRelationToDataFrame(BaseRelation baseRelation) |
protected org.apache.spark.sql.execution.CacheManager |
cacheManager() |
void |
cacheTable(java.lang.String tableName)
Caches the specified table in-memory.
|
protected org.apache.spark.sql.catalyst.analysis.Catalog |
catalog() |
static void |
clearActive()
Clears the active SQLContext for current thread.
|
void |
clearCache()
Removes all cached tables from the in-memory cache.
|
protected org.apache.spark.sql.SQLConf |
conf() |
DataFrame |
createDataFrame(JavaRDD<?> rdd,
java.lang.Class<?> beanClass) |
DataFrame |
createDataFrame(JavaRDD<Row> rowRDD,
StructType schema) |
DataFrame |
createDataFrame(java.util.List<?> data,
java.lang.Class<?> beanClass) |
DataFrame |
createDataFrame(java.util.List<Row> rows,
StructType schema) |
DataFrame |
createDataFrame(RDD<?> rdd,
java.lang.Class<?> beanClass) |
<A extends scala.Product> |
createDataFrame(RDD<A> rdd,
scala.reflect.api.TypeTags.TypeTag<A> evidence$1) |
DataFrame |
createDataFrame(RDD<Row> rowRDD,
StructType schema) |
<A extends scala.Product> |
createDataFrame(scala.collection.Seq<A> data,
scala.reflect.api.TypeTags.TypeTag<A> evidence$2) |
<T> Dataset<T> |
createDataset(java.util.List<T> data,
Encoder<T> evidence$5) |
<T> Dataset<T> |
createDataset(RDD<T> data,
Encoder<T> evidence$4) |
<T> Dataset<T> |
createDataset(scala.collection.Seq<T> data,
Encoder<T> evidence$3) |
DataFrame |
createExternalTable(java.lang.String tableName,
java.lang.String path) |
DataFrame |
createExternalTable(java.lang.String tableName,
java.lang.String source,
java.util.Map<java.lang.String,java.lang.String> options) |
DataFrame |
createExternalTable(java.lang.String tableName,
java.lang.String source,
scala.collection.immutable.Map<java.lang.String,java.lang.String> options) |
DataFrame |
createExternalTable(java.lang.String tableName,
java.lang.String path,
java.lang.String source) |
DataFrame |
createExternalTable(java.lang.String tableName,
java.lang.String source,
StructType schema,
java.util.Map<java.lang.String,java.lang.String> options) |
DataFrame |
createExternalTable(java.lang.String tableName,
java.lang.String source,
StructType schema,
scala.collection.immutable.Map<java.lang.String,java.lang.String> options) |
protected org.apache.spark.sql.execution.datasources.DDLParser |
ddlParser() |
protected java.lang.String |
dialectClassName() |
void |
dropTempTable(java.lang.String tableName) |
DataFrame |
emptyDataFrame()
:: Experimental ::
Returns a
DataFrame with no rows or columns. |
protected RDD<org.apache.spark.sql.catalyst.InternalRow> |
emptyResult() |
protected org.apache.spark.sql.execution.QueryExecution |
executePlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan plan) |
protected org.apache.spark.sql.execution.QueryExecution |
executeSql(java.lang.String sql) |
ExperimentalMethods |
experimental()
:: Experimental ::
A collection of methods that are considered experimental, but can be used to hook into
the query planner for advanced functionality.
|
protected org.apache.spark.sql.catalyst.analysis.FunctionRegistry |
functionRegistry() |
scala.collection.immutable.Map<java.lang.String,java.lang.String> |
getAllConfs()
Return all the configuration properties that have been set (i.e.
|
java.lang.String |
getConf(java.lang.String key)
Return the value of Spark SQL configuration property for the given key.
|
java.lang.String |
getConf(java.lang.String key,
java.lang.String defaultValue)
Return the value of Spark SQL configuration property for the given key.
|
static SQLContext |
getOrCreate(SparkContext sparkContext)
Get the singleton SQLContext if it exists or create a new one using the given SparkContext.
|
protected scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.AttributeReference> |
getSchema(java.lang.Class<?> beanClass) |
protected org.apache.spark.sql.catalyst.ParserDialect |
getSQLDialect() |
SQLContext.implicits$ |
implicits()
Accessor for nested Scala object
|
boolean |
isCached(java.lang.String tableName)
Returns true if the table is currently cached in-memory.
|
boolean |
isRootContext() |
DataFrame |
jdbc(java.lang.String url,
java.lang.String table)
Deprecated.
As of 1.4.0, replaced by
read().jdbc() . This will be removed in Spark 2.0. |
DataFrame |
jdbc(java.lang.String url,
java.lang.String table,
java.lang.String[] theParts)
Deprecated.
As of 1.4.0, replaced by
read().jdbc() . This will be removed in Spark 2.0. |
DataFrame |
jdbc(java.lang.String url,
java.lang.String table,
java.lang.String columnName,
long lowerBound,
long upperBound,
int numPartitions)
Deprecated.
As of 1.4.0, replaced by
read().jdbc() . This will be removed in Spark 2.0. |
DataFrame |
jsonFile(java.lang.String path)
Deprecated.
As of 1.4.0, replaced by
read().json() . This will be removed in Spark 2.0. |
DataFrame |
jsonFile(java.lang.String path,
double samplingRatio)
Deprecated.
As of 1.4.0, replaced by
read().json() . This will be removed in Spark 2.0. |
DataFrame |
jsonFile(java.lang.String path,
StructType schema)
Deprecated.
As of 1.4.0, replaced by
read().json() . This will be removed in Spark 2.0. |
DataFrame |
jsonRDD(JavaRDD<java.lang.String> json)
Deprecated.
As of 1.4.0, replaced by
read().json() . This will be removed in Spark 2.0. |
DataFrame |
jsonRDD(JavaRDD<java.lang.String> json,
double samplingRatio)
Deprecated.
As of 1.4.0, replaced by
read().json() . This will be removed in Spark 2.0. |
DataFrame |
jsonRDD(JavaRDD<java.lang.String> json,
StructType schema)
Deprecated.
As of 1.4.0, replaced by
read().json() . This will be removed in Spark 2.0. |
DataFrame |
jsonRDD(RDD<java.lang.String> json)
Deprecated.
As of 1.4.0, replaced by
read().json() . This will be removed in Spark 2.0. |
DataFrame |
jsonRDD(RDD<java.lang.String> json,
double samplingRatio)
Deprecated.
As of 1.4.0, replaced by
read().json() . This will be removed in Spark 2.0. |
DataFrame |
jsonRDD(RDD<java.lang.String> json,
StructType schema)
Deprecated.
As of 1.4.0, replaced by
read().json() . This will be removed in Spark 2.0. |
org.apache.spark.sql.execution.ui.SQLListener |
listener() |
ExecutionListenerManager |
listenerManager() |
DataFrame |
load(java.lang.String path)
Deprecated.
As of 1.4.0, replaced by
read().load(path) . This will be removed in Spark 2.0. |
DataFrame |
load(java.lang.String source,
java.util.Map<java.lang.String,java.lang.String> options)
Deprecated.
As of 1.4.0, replaced by
read().format(source).options(options).load() .
This will be removed in Spark 2.0. |
DataFrame |
load(java.lang.String source,
scala.collection.immutable.Map<java.lang.String,java.lang.String> options)
Deprecated.
As of 1.4.0, replaced by
read().format(source).options(options).load() . |
DataFrame |
load(java.lang.String path,
java.lang.String source)
Deprecated.
As of 1.4.0, replaced by
read().format(source).load(path) .
This will be removed in Spark 2.0. |
DataFrame |
load(java.lang.String source,
StructType schema,
java.util.Map<java.lang.String,java.lang.String> options)
Deprecated.
As of 1.4.0, replaced by
read().format(source).schema(schema).options(options).load() . |
DataFrame |
load(java.lang.String source,
StructType schema,
scala.collection.immutable.Map<java.lang.String,java.lang.String> options)
Deprecated.
As of 1.4.0, replaced by
read().format(source).schema(schema).options(options).load() . |
SQLContext |
newSession()
Returns a SQLContext as new session, with separated SQL configurations, temporary tables,
registered functions, but sharing the same SparkContext, CacheManager, SQLListener and SQLTab.
|
protected org.apache.spark.sql.catalyst.optimizer.Optimizer |
optimizer() |
DataFrame |
parquetFile(scala.collection.Seq<java.lang.String> paths) |
DataFrame |
parquetFile(java.lang.String... paths)
Deprecated.
As of 1.4.0, replaced by
read().parquet() . This will be removed in Spark 2.0. |
protected DataType |
parseDataType(java.lang.String dataTypeString) |
protected org.apache.spark.sql.catalyst.plans.logical.LogicalPlan |
parseSql(java.lang.String sql) |
protected org.apache.spark.sql.execution.SparkPlanner |
planner() |
protected org.apache.spark.sql.catalyst.rules.RuleExecutor<org.apache.spark.sql.execution.SparkPlan> |
prepareForExecution() |
DataFrame |
range(long end) |
DataFrame |
range(long start,
long end) |
DataFrame |
range(long start,
long end,
long step,
int numPartitions) |
DataFrameReader |
read() |
static void |
setActive(SQLContext sqlContext)
Changes the SQLContext that will be returned in this thread and its children when
SQLContext.getOrCreate() is called.
|
void |
setConf(java.util.Properties props)
Set Spark SQL configuration properties.
|
void |
setConf(java.lang.String key,
java.lang.String value)
Set the given Spark SQL configuration property.
|
SparkContext |
sparkContext() |
DataFrame |
sql(java.lang.String sqlText) |
protected org.apache.spark.sql.execution.SparkSQLParser |
sqlParser() |
DataFrame |
table(java.lang.String tableName) |
java.lang.String[] |
tableNames() |
java.lang.String[] |
tableNames(java.lang.String databaseName) |
DataFrame |
tables() |
DataFrame |
tables(java.lang.String databaseName) |
UDFRegistration |
udf()
A collection of methods for registering user-defined functions (UDF).
|
void |
uncacheTable(java.lang.String tableName)
Removes the specified table from the in-memory cache.
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
initializeIfNecessary, initializeLogging, isTraceEnabled, log_, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning
public SQLContext(SparkContext sparkContext)
public SQLContext(JavaSparkContext sparkContext)
public static SQLContext getOrCreate(SparkContext sparkContext)
This function can be used to create a singleton SQLContext object that can be shared across the JVM.
If there is an active SQLContext for current thread, it will be returned instead of the global one.
sparkContext
- (undocumented)public static void setActive(SQLContext sqlContext)
sqlContext
- (undocumented)public static void clearActive()
public DataFrame parquetFile(java.lang.String... paths)
read().parquet()
. This will be removed in Spark 2.0.DataFrame
. This function returns an empty
DataFrame
if no paths are passed in.
paths
- (undocumented)public SparkContext sparkContext()
protected org.apache.spark.sql.execution.CacheManager cacheManager()
public org.apache.spark.sql.execution.ui.SQLListener listener()
public boolean isRootContext()
public SQLContext newSession()
protected org.apache.spark.sql.SQLConf conf()
public void setConf(java.util.Properties props)
props
- (undocumented)public void setConf(java.lang.String key, java.lang.String value)
key
- (undocumented)value
- (undocumented)public java.lang.String getConf(java.lang.String key)
key
- (undocumented)public java.lang.String getConf(java.lang.String key, java.lang.String defaultValue)
defaultValue
.
key
- (undocumented)defaultValue
- (undocumented)public scala.collection.immutable.Map<java.lang.String,java.lang.String> getAllConfs()
public ExecutionListenerManager listenerManager()
protected org.apache.spark.sql.catalyst.analysis.Catalog catalog()
protected org.apache.spark.sql.catalyst.analysis.FunctionRegistry functionRegistry()
protected org.apache.spark.sql.catalyst.analysis.Analyzer analyzer()
protected org.apache.spark.sql.catalyst.optimizer.Optimizer optimizer()
protected org.apache.spark.sql.execution.datasources.DDLParser ddlParser()
protected org.apache.spark.sql.execution.SparkSQLParser sqlParser()
protected org.apache.spark.sql.catalyst.ParserDialect getSQLDialect()
protected org.apache.spark.sql.catalyst.plans.logical.LogicalPlan parseSql(java.lang.String sql)
protected org.apache.spark.sql.execution.QueryExecution executeSql(java.lang.String sql)
protected org.apache.spark.sql.execution.QueryExecution executePlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan plan)
protected java.lang.String dialectClassName()
protected void addJar(java.lang.String path)
path
- (undocumented)public ExperimentalMethods experimental()
public DataFrame emptyDataFrame()
DataFrame
with no rows or columns.
public UDFRegistration udf()
The following example registers a Scala closure as UDF:
sqlContext.udf.register("myUDF", (arg1: Int, arg2: String) => arg2 + arg1)
The following example registers a UDF in Java:
sqlContext.udf().register("myUDF",
new UDF2<Integer, String, String>() {
@Override
public String call(Integer arg1, String arg2) {
return arg2 + arg1;
}
}, DataTypes.StringType);
Or, to use Java 8 lambda syntax:
sqlContext.udf().register("myUDF",
(Integer arg1, String arg2) -> arg2 + arg1,
DataTypes.StringType);
public boolean isCached(java.lang.String tableName)
tableName
- (undocumented)public void cacheTable(java.lang.String tableName)
tableName
- (undocumented)public void uncacheTable(java.lang.String tableName)
tableName
- (undocumented)public void clearCache()
public SQLContext.implicits$ implicits()
public <A extends scala.Product> DataFrame createDataFrame(RDD<A> rdd, scala.reflect.api.TypeTags.TypeTag<A> evidence$1)
public <A extends scala.Product> DataFrame createDataFrame(scala.collection.Seq<A> data, scala.reflect.api.TypeTags.TypeTag<A> evidence$2)
public DataFrame baseRelationToDataFrame(BaseRelation baseRelation)
public DataFrame createDataFrame(RDD<Row> rowRDD, StructType schema)
public <T> Dataset<T> createDataset(scala.collection.Seq<T> data, Encoder<T> evidence$3)
public DataFrame createDataFrame(JavaRDD<Row> rowRDD, StructType schema)
public DataFrame createDataFrame(java.util.List<Row> rows, StructType schema)
public DataFrame createDataFrame(java.util.List<?> data, java.lang.Class<?> beanClass)
public DataFrameReader read()
public DataFrame createExternalTable(java.lang.String tableName, java.lang.String path)
public DataFrame createExternalTable(java.lang.String tableName, java.lang.String path, java.lang.String source)
public DataFrame createExternalTable(java.lang.String tableName, java.lang.String source, java.util.Map<java.lang.String,java.lang.String> options)
public DataFrame createExternalTable(java.lang.String tableName, java.lang.String source, scala.collection.immutable.Map<java.lang.String,java.lang.String> options)
public DataFrame createExternalTable(java.lang.String tableName, java.lang.String source, StructType schema, java.util.Map<java.lang.String,java.lang.String> options)
public DataFrame createExternalTable(java.lang.String tableName, java.lang.String source, StructType schema, scala.collection.immutable.Map<java.lang.String,java.lang.String> options)
public void dropTempTable(java.lang.String tableName)
public DataFrame range(long end)
public DataFrame range(long start, long end)
public DataFrame range(long start, long end, long step, int numPartitions)
public DataFrame sql(java.lang.String sqlText)
public DataFrame table(java.lang.String tableName)
public DataFrame tables()
public DataFrame tables(java.lang.String databaseName)
public java.lang.String[] tableNames()
public java.lang.String[] tableNames(java.lang.String databaseName)
protected org.apache.spark.sql.execution.SparkPlanner planner()
protected RDD<org.apache.spark.sql.catalyst.InternalRow> emptyResult()
protected org.apache.spark.sql.catalyst.rules.RuleExecutor<org.apache.spark.sql.execution.SparkPlan> prepareForExecution()
protected DataType parseDataType(java.lang.String dataTypeString)
protected DataFrame applySchemaToPythonRDD(RDD<java.lang.Object[]> rdd, java.lang.String schemaString)
protected DataFrame applySchemaToPythonRDD(RDD<java.lang.Object[]> rdd, StructType schema)
protected scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.AttributeReference> getSchema(java.lang.Class<?> beanClass)
public DataFrame applySchema(RDD<Row> rowRDD, StructType schema)
public DataFrame applySchema(JavaRDD<Row> rowRDD, StructType schema)
public DataFrame parquetFile(scala.collection.Seq<java.lang.String> paths)
public DataFrame jsonFile(java.lang.String path)
read().json()
. This will be removed in Spark 2.0.DataFrame
.
It goes through the entire dataset once to determine the schema.
path
- (undocumented)public DataFrame jsonFile(java.lang.String path, StructType schema)
read().json()
. This will be removed in Spark 2.0.DataFrame
.
path
- (undocumented)schema
- (undocumented)public DataFrame jsonFile(java.lang.String path, double samplingRatio)
read().json()
. This will be removed in Spark 2.0.path
- (undocumented)samplingRatio
- (undocumented)public DataFrame jsonRDD(RDD<java.lang.String> json)
read().json()
. This will be removed in Spark 2.0.DataFrame
.
It goes through the entire dataset once to determine the schema.
json
- (undocumented)public DataFrame jsonRDD(JavaRDD<java.lang.String> json)
read().json()
. This will be removed in Spark 2.0.DataFrame
.
It goes through the entire dataset once to determine the schema.
json
- (undocumented)public DataFrame jsonRDD(RDD<java.lang.String> json, StructType schema)
read().json()
. This will be removed in Spark 2.0.DataFrame
.
json
- (undocumented)schema
- (undocumented)public DataFrame jsonRDD(JavaRDD<java.lang.String> json, StructType schema)
read().json()
. This will be removed in Spark 2.0.DataFrame
.
json
- (undocumented)schema
- (undocumented)public DataFrame jsonRDD(RDD<java.lang.String> json, double samplingRatio)
read().json()
. This will be removed in Spark 2.0.DataFrame
.
json
- (undocumented)samplingRatio
- (undocumented)public DataFrame jsonRDD(JavaRDD<java.lang.String> json, double samplingRatio)
read().json()
. This will be removed in Spark 2.0.DataFrame
.
json
- (undocumented)samplingRatio
- (undocumented)public DataFrame load(java.lang.String path)
read().load(path)
. This will be removed in Spark 2.0.path
- (undocumented)public DataFrame load(java.lang.String path, java.lang.String source)
read().format(source).load(path)
.
This will be removed in Spark 2.0.path
- (undocumented)source
- (undocumented)public DataFrame load(java.lang.String source, java.util.Map<java.lang.String,java.lang.String> options)
read().format(source).options(options).load()
.
This will be removed in Spark 2.0.source
- (undocumented)options
- (undocumented)public DataFrame load(java.lang.String source, scala.collection.immutable.Map<java.lang.String,java.lang.String> options)
read().format(source).options(options).load()
.source
- (undocumented)options
- (undocumented)public DataFrame load(java.lang.String source, StructType schema, java.util.Map<java.lang.String,java.lang.String> options)
read().format(source).schema(schema).options(options).load()
.source
- (undocumented)schema
- (undocumented)options
- (undocumented)public DataFrame load(java.lang.String source, StructType schema, scala.collection.immutable.Map<java.lang.String,java.lang.String> options)
read().format(source).schema(schema).options(options).load()
.source
- (undocumented)schema
- (undocumented)options
- (undocumented)public DataFrame jdbc(java.lang.String url, java.lang.String table)
read().jdbc()
. This will be removed in Spark 2.0.DataFrame
representing the database table accessible via JDBC URL
url named table.
url
- (undocumented)table
- (undocumented)public DataFrame jdbc(java.lang.String url, java.lang.String table, java.lang.String columnName, long lowerBound, long upperBound, int numPartitions)
read().jdbc()
. This will be removed in Spark 2.0.DataFrame
representing the database table accessible via JDBC URL
url named table. Partitions of the table will be retrieved in parallel based on the parameters
passed to this function.
columnName
- the name of a column of integral type that will be used for partitioning.lowerBound
- the minimum value of columnName
used to decide partition strideupperBound
- the maximum value of columnName
used to decide partition stridenumPartitions
- the number of partitions. the range minValue
-maxValue
will be split
evenly into this many partitionsurl
- (undocumented)table
- (undocumented)public DataFrame jdbc(java.lang.String url, java.lang.String table, java.lang.String[] theParts)
read().jdbc()
. This will be removed in Spark 2.0.DataFrame
representing the database table accessible via JDBC URL
url named table. The theParts parameter gives a list expressions
suitable for inclusion in WHERE clauses; each one defines one partition
of the DataFrame
.
url
- (undocumented)table
- (undocumented)theParts
- (undocumented)