Reading Parquet Files from a Java Application

Recently I came accross the requirement to read a parquet file into a java application and I figured out it is neither well documented nor easy to do so. As a consequence I wrote a short tutorial. The first task is to add your maven dependencies.


To write the java application is easy once you know how to do it. Instead of using the AvroParquetReader or the ParquetReader class that you find frequently when searching for a solution to read parquet files use the class ParquetFileReader instead. The basic setup is to read all row groups and then read all groups recursively.

package de.jofre.test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.Type;
public class Main {
  private static Path path = new Path("file:\\C:\\myfile.snappy.parquet");
  private static void printGroup(Group g) {
    int fieldCount = g.getType().getFieldCount();
    for (int field = 0; field &lt; fieldCount; field++) {
      int valueCount = g.getFieldRepetitionCount(field);
      Type fieldType = g.getType().getType(field);
      String fieldName = fieldType.getName();
      for (int index = 0; index &lt; valueCount; index++) {
        if (fieldType.isPrimitive()) {
          System.out.println(fieldName + " " + g.getValueToString(field, index));
  public static void main(String[] args) throws IllegalArgumentException {
    Configuration conf = new Configuration();
    try {
      ParquetMetadata readFooter = ParquetFileReader.readFooter(conf, path, ParquetMetadataConverter.NO_FILTER);
      MessageType schema = readFooter.getFileMetaData().getSchema();
      ParquetFileReader r = new ParquetFileReader(conf, path, readFooter);
      PageReadStore pages = null;
      try {
        while (null != (pages = r.readNextRowGroup())) {
          final long rows = pages.getRowCount();
          System.out.println("Number of rows: " + rows);
          final MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
          final RecordReader recordReader = columnIO.getRecordReader(pages, new GroupRecordConverter(schema));
          for (int i = 0; i &lt; rows; i++) {
            final Group g =;
            // TODO Compare to System.out.println(g);
      } finally {
    } catch (IOException e) {
      System.out.println("Error reading parquet file.");

Leave a Reply

Your email address will not be published. Required fields are marked *