error spark分桶保存hive失敗
Output Hive table XXX is bucketed but Spark currently does NOT populate bucketed output which is compatible with Hive;
source code
table.bucketSpec match {
case Some(bucketSpec) =>
// Writes to bucketed hive tables are allowed only if user does not care about maintaining
// table's bucketing ie. both "hive.enforce.bucketing" and "hive.enforce.sorting" are
// set to false
val enforceBucketingConfig = "hive.enforce.bucketing"
val enforceSortingConfig = "hive.enforce.sorting"
val message = s"Output Hive table ${table.identifier} is bucketed but Spark " +
"currently does NOT populate bucketed output which is compatible with Hive."
if (hadoopConf.get(enforceBucketingConfig, "true").toBoolean ||
hadoopConf.get(enforceSortingConfig, "true").toBoolean) {
throw new AnalysisException(message)
} else {
logWarning(message + s" Inserting data anyways since both $enforceBucketingConfig and " +
s"$enforceSortingConfig are set to false.")
}
case _ => // do nothing since table has no bucketing
}