package org.apache.iceberg.spark.actions;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Stream;
import org.apache.iceberg.NullOrder;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SortDirection;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.Table;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.spark.SparkUtil;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.PropertyUtil;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:org/apache/iceberg/spark/actions/SparkZOrderDataRewriter.class */
public class SparkZOrderDataRewriter extends SparkShufflingDataRewriter {
    private static final Logger LOG = LoggerFactory.getLogger(SparkZOrderDataRewriter.class);
    private static final String Z_COLUMN = "ICEZVALUE";
    private static final Schema Z_SCHEMA = new Schema(Types.NestedField.required(0, Z_COLUMN, Types.BinaryType.get()));
    private static final SortOrder Z_SORT_ORDER = SortOrder.builderFor(Z_SCHEMA).sortBy(Z_COLUMN, SortDirection.ASC, NullOrder.NULLS_LAST).build();
    public static final String MAX_OUTPUT_SIZE = "max-output-size";
    public static final int MAX_OUTPUT_SIZE_DEFAULT = Integer.MAX_VALUE;
    public static final String VAR_LENGTH_CONTRIBUTION = "var-length-contribution";
    public static final int VAR_LENGTH_CONTRIBUTION_DEFAULT = 8;
    private final List<String> zOrderColNames;
    private int maxOutputSize;
    private int varLengthContribution;

    /* JADX INFO: Access modifiers changed from: package-private */
    public SparkZOrderDataRewriter(SparkSession sparkSession, Table table, List<String> list) {
        super(sparkSession, table);
        this.zOrderColNames = validZOrderColNames(sparkSession, table, list);
    }

    @Override // org.apache.iceberg.actions.FileRewriter
    public String description() {
        return "Z-ORDER";
    }

    @Override // org.apache.iceberg.spark.actions.SparkShufflingDataRewriter, org.apache.iceberg.actions.SizeBasedDataRewriter, org.apache.iceberg.actions.SizeBasedFileRewriter, org.apache.iceberg.actions.FileRewriter
    public Set<String> validOptions() {
        return ImmutableSet.builder().addAll((Iterable) super.validOptions()).add((ImmutableSet.Builder) MAX_OUTPUT_SIZE).add((ImmutableSet.Builder) VAR_LENGTH_CONTRIBUTION).build();
    }

    @Override // org.apache.iceberg.spark.actions.SparkShufflingDataRewriter, org.apache.iceberg.actions.SizeBasedDataRewriter, org.apache.iceberg.actions.SizeBasedFileRewriter, org.apache.iceberg.actions.FileRewriter
    public void init(Map<String, String> map) {
        super.init(map);
        this.maxOutputSize = maxOutputSize(map);
        this.varLengthContribution = varLengthContribution(map);
    }

    @Override // org.apache.iceberg.spark.actions.SparkShufflingDataRewriter
    protected SortOrder sortOrder() {
        return Z_SORT_ORDER;
    }

    @Override // org.apache.iceberg.spark.actions.SparkShufflingDataRewriter
    protected Schema sortSchema() {
        return new Schema(new ImmutableList.Builder().addAll((Iterable) table().schema().columns()).addAll((Iterable) Z_SCHEMA.columns()).build());
    }

    @Override // org.apache.iceberg.spark.actions.SparkShufflingDataRewriter
    protected Dataset<Row> sortedDF(Dataset<Row> dataset, Function<Dataset<Row>, Dataset<Row>> function) {
        return function.apply(dataset.withColumn(Z_COLUMN, zValue(dataset))).drop(Z_COLUMN);
    }

    private Column zValue(Dataset<Row> dataset) {
        SparkZOrderUDF sparkZOrderUDF = new SparkZOrderUDF(this.zOrderColNames.size(), this.varLengthContribution, this.maxOutputSize);
        Stream<String> stream = this.zOrderColNames.stream();
        StructType schema = dataset.schema();
        schema.getClass();
        return sparkZOrderUDF.interleaveBytes(functions.array((Column[]) stream.map(schema::apply).map(structField -> {
            return sparkZOrderUDF.sortedLexicographically(dataset.col(structField.name()), structField.dataType());
        }).toArray(i -> {
            return new Column[i];
        })));
    }

    private int varLengthContribution(Map<String, String> map) {
        int propertyAsInt = PropertyUtil.propertyAsInt(map, VAR_LENGTH_CONTRIBUTION, 8);
        Preconditions.checkArgument(propertyAsInt > 0, "Cannot use less than 1 byte for variable length types with ZOrder, '%s' was set to %s", (Object) VAR_LENGTH_CONTRIBUTION, propertyAsInt);
        return propertyAsInt;
    }

    private int maxOutputSize(Map<String, String> map) {
        int propertyAsInt = PropertyUtil.propertyAsInt(map, MAX_OUTPUT_SIZE, Integer.MAX_VALUE);
        Preconditions.checkArgument(propertyAsInt > 0, "Cannot have the interleaved ZOrder value use less than 1 byte, '%s' was set to %s", (Object) MAX_OUTPUT_SIZE, propertyAsInt);
        return propertyAsInt;
    }

    private List<String> validZOrderColNames(SparkSession sparkSession, Table table, List<String> list) {
        Preconditions.checkArgument((list == null || list.isEmpty()) ? false : true, "Cannot ZOrder when no columns are specified");
        Schema schema = table.schema();
        Set<Integer> identitySourceIds = table.spec().identitySourceIds();
        boolean caseSensitive = SparkUtil.caseSensitive(sparkSession);
        ArrayList newArrayList = Lists.newArrayList();
        for (String str : list) {
            Types.NestedField findField = caseSensitive ? schema.findField(str) : schema.caseInsensitiveFindField(str);
            Preconditions.checkArgument(findField != null, "Cannot find column '%s' in table schema (case sensitive = %s): %s", str, Boolean.valueOf(caseSensitive), schema.asStruct());
            if (identitySourceIds.contains(Integer.valueOf(findField.fieldId()))) {
                LOG.warn("Ignoring '{}' as such values are constant within a partition", str);
            } else {
                newArrayList.add(str);
            }
        }
        Preconditions.checkArgument(!newArrayList.isEmpty(), "Cannot ZOrder, all columns provided were identity partition columns and cannot be used");
        return newArrayList;
    }
}
