pixelmed
Parse.java
Go to the documentation of this file.
1 /* Copyright (c) 2014-2015, David A. Clunie DBA Pixelmed Publishing. All rights reserved. */
2 
3 package com.pixelmed.codec.jpeg;
4 
5 import java.awt.Rectangle;
6 import java.awt.Shape;
7 
8 import java.io.ByteArrayOutputStream;
9 import java.io.File;
10 import java.io.FileInputStream;
11 import java.io.FileOutputStream;
12 import java.io.InputStream;
13 import java.io.IOException;
14 import java.io.OutputStream;
15 
16 import java.nio.ByteOrder;
17 
18 import java.util.HashMap;
19 import java.util.Map;
20 import java.util.Vector;
21 
33 public class Parse {
34 
35  private static final String identString = "@(#) $Header: /userland/cvs/codec/com/pixelmed/codec/jpeg/Parse.java,v 1.18 2017/03/21 17:42:24 dclunie Exp $";
36 
37  private static int getLargestSamplingFactor(int[] factors) {
38  int largest = 0;
39  for (int factor : factors) {
40  if (factor > largest) {
41  largest = factor;
42  }
43  }
44  return largest;
45  }
46 
47  private static final void writeMarkerAndLength(OutputStream out, int marker,int length) throws IOException {
48  out.write(0xff);
49  out.write(marker&0xff);
50  out.write((length>>>8)&0xff);
51  out.write(length&0xff);
52  }
53 
54  private static final void writeVariableLengthMarkerSegment(OutputStream out, int marker,int length,byte[] b) throws IOException {
55  writeMarkerAndLength(out,marker,length);
56  out.write(b,0,length-2);
57  }
58 
59  public static class DecompressedOutput {
60  private int nComponents;
61  private OutputArrayOrStream[] decompressedOutputPerComponent;
62  private File fileBasis;
63  private ByteOrder order;
64 
65  public DecompressedOutput() {
66  }
67 
68  /*
69  * @param fileBasis will be used literally if one component, with an appended suffix _n before the file extension (if any), where n is the component number from 0
70  */
71  public DecompressedOutput(File fileBasis,ByteOrder order) {
72  this.fileBasis = fileBasis;
73  this.order = order;
74  }
75 
76  public OutputArrayOrStream[] getDecompressedOutputPerComponent() { return decompressedOutputPerComponent; }
77 
78  public void configureDecompressedOutput(MarkerSegmentSOF sof) throws IOException {
79  nComponents = sof.getNComponentsInFrame();
80  decompressedOutputPerComponent = new OutputArrayOrStream[nComponents];
81  if (fileBasis == null) {
82  int length = sof.getNSamplesPerLine() * sof.getNLines();
83  for (int c=0; c<nComponents; ++c) {
84  decompressedOutputPerComponent[c] = new OutputArrayOrStream();
85  if (sof.getSamplePrecision() <= 8) {
86  decompressedOutputPerComponent[c].allocateByteArray(length);
87  }
88  else {
89  decompressedOutputPerComponent[c].allocateShortArray(length);
90  }
91  }
92  }
93  else {
94  if (nComponents == 1) {
95  decompressedOutputPerComponent[0] = new OutputArrayOrStream(new FileOutputStream(fileBasis),order);
96  }
97  else {
98  File parent = fileBasis.getParentFile(); // may be null
99  String baseFileName = fileBasis.getName();
100  String prefix;
101  String suffix;
102  int periodPosition = baseFileName.lastIndexOf('.');
103  if (periodPosition > -1) {
104  if (periodPosition > 0) {
105  prefix = baseFileName.substring(0,periodPosition); // copies from 0 to periodPosition-1
106  }
107  else {
108  prefix = "";
109  }
110  suffix = baseFileName.substring(periodPosition); // copies the period to the end
111  }
112  else {
113  prefix = baseFileName;
114  suffix = "";
115  }
116  for (int c=0; c<nComponents; ++c) {
117  String componentFileName = prefix + c + suffix;
118 //System.err.println("Parse.DecompressedOutput.configureDecompressedOutput(): componentFileName["+c+"] = "+componentFileName);
119  decompressedOutputPerComponent[c] = new OutputArrayOrStream(new FileOutputStream(new File(parent,componentFileName)),order); // OK if parent is null
120  }
121  }
122  }
123  }
124 
125  public void close() throws IOException {
126  for (int c=0; c<nComponents; ++c) {
127  decompressedOutputPerComponent[c].close();
128  }
129  }
130 
131  }
132 
133  public static class MarkerSegmentsFoundDuringParse {
134  private MarkerSegmentSOS sos;
135  private MarkerSegmentSOF sof;
136  private Map<String,HuffmanTable> htByClassAndIdentifer;
137  private Map<String,QuantizationTable> qtByIdentifer;
138 
139  public MarkerSegmentSOS getSOS() { return sos; }
140  public MarkerSegmentSOF getSOF() { return sof; }
141  public Map<String,HuffmanTable> getHuffmanTableByClassAndIdentifer() { return htByClassAndIdentifer; }
142  public Map<String,QuantizationTable> getQuantizationTableByIdentifer() { return qtByIdentifer; }
143 
144  public MarkerSegmentsFoundDuringParse(MarkerSegmentSOS sos,MarkerSegmentSOF sof,Map<String,HuffmanTable> htByClassAndIdentifer,Map<String,QuantizationTable> qtByIdentifer) {
145  this.sos = sos;
146  this.sof = sof;
147  this.htByClassAndIdentifer = htByClassAndIdentifer;
148  this.qtByIdentifer = qtByIdentifer;
149  }
150  }
151 
152  // follows pattern of dicom3tools appsrc/misc/jpegdump.cc
153 
169  public static MarkerSegmentsFoundDuringParse parse(InputStream in,OutputStream copiedRedactedOutputStream,Vector<Shape> redactionShapes,DecompressedOutput decompressedOutput) throws Exception, IOException {
170  boolean dumping = copiedRedactedOutputStream == null && decompressedOutput == null;
171  //boolean dumping = true;
172  boolean copying = copiedRedactedOutputStream != null;
173  boolean decompressing = decompressedOutput != null;
174 
175  EntropyCodedSegment ecs = null; // lazy instantiation of EntropyCodedSegment ... wait until we have relevant marker segments for its constructor
176 
177  ByteArrayOutputStream byteAccumulator = null; // recreated for first byte of each EntropyCodedSegment (at start and at each subsequent restart interval)
178 
179  MarkerSegmentSOS sos = null;
180  MarkerSegmentSOF sof = null;
181  Map<String,HuffmanTable> htByClassAndIdentifer = new HashMap<String,HuffmanTable>();
182  Map<String,QuantizationTable> qtByIdentifer = new HashMap<String,QuantizationTable>();
183  int restartinterval = 0;
184 
185  int mcuOffset = 0;
186  int nMCUHorizontally = 0;
187  int nMCUVertically = 0;
188  int mcuCountPerEntropyCodedSegment = 0;
189 
190  int offset=0;
191  int markerprefix = in.read();
192  while (true) {
193  int marker=0; // will be overwritten by what we read, unless we have premature EOF, in which case this will not be used
194  boolean sawEOF = false;
195  if (markerprefix == -1) {
196  if (dumping) System.err.print("End of file\n");
197  sawEOF=true;
198  }
199  else {
200  if (markerprefix != 0xff) { // byte of entropy-coded segment
201  if (byteAccumulator == null) {
202  if (dumping) System.err.print("Offset "+Utilities.toPaddedHexString(offset,4)+" Starting new Entropy Coded Segment\n");
203  byteAccumulator = new ByteArrayOutputStream();
204  }
205  byteAccumulator.write(markerprefix);
206  ++offset;
207  markerprefix=in.read();
208  continue;
209  }
210  marker=in.read();
211  if (marker == -1) {
212  if (dumping) System.err.print("End of file immediately after marker flag 0xff ... presumably was padding\n");
213  sawEOF=true;
214  }
215  else if (marker == 0xff) { // 0xff byte of padding
216  if (dumping) System.err.print("Offset "+Utilities.toPaddedHexString(offset,4)+" Fill byte 0xff\n");
217  ++offset;
218  markerprefix=marker; // the first 0xff is padding, the 2nd may be the start of a marker
219  continue;
220  }
221  // ignore doing_jpeg2k_tilepart for now :(
222  else if (marker == 0) { // 0xff byte of entropy-coded segment ... ignore following zero byte
223  if (dumping) System.err.print("Offset "+Utilities.toPaddedHexString(offset,4)+" Encoded 0xff in entropy-coded segment followed by stuffed zero byte\n");
224  if (byteAccumulator == null) {
225  if (dumping) System.err.print("Offset "+Utilities.toPaddedHexString(offset,4)+" Starting new Entropy Coded Segment\n");
226  byteAccumulator = new ByteArrayOutputStream();
227  }
228  byteAccumulator.write(markerprefix);
229  markerprefix=in.read();
230  offset+=2;
231  continue;
232  }
233  // ignore doing_jpegls and zero stuffed bit instead of byte for now :(
234  }
235 
236  // Definitely have a marker or EOF ...
237 
238  if (byteAccumulator != null) {
239  // process any Entropy Coded Segment bytes accumulated so far ...
240  if (ecs == null) {
241  // need to figure out the sampling factors if this is the first Entropy Coded Segment, so that EntropyCodedSegment.finish() knows how many to process and where it is at
242 
243  if (sof == null) {
244  throw new Exception("Error - compressed data without preceding SOF marker segment");
245  }
246 
247  int blockSize = Markers.isDCT(sof.getMarker()) ? 8 : 1;
248 
249  int horizontalSamplesPerMCU = blockSize * getLargestSamplingFactor(sof.getHorizontalSamplingFactor());
250 //System.err.println("horizontalSamplesPerMCU "+horizontalSamplesPerMCU);
251  nMCUHorizontally = (sof.getNSamplesPerLine()-1)/horizontalSamplesPerMCU + 1;
252 //System.err.println("nMCUHorizontally "+nMCUHorizontally);
253 
254  int verticalSamplesPerMCU = blockSize * getLargestSamplingFactor(sof.getVerticalSamplingFactor());
255 //System.err.println("verticalSamplesPerMCU "+verticalSamplesPerMCU);
256  nMCUVertically = (sof.getNLines()-1)/verticalSamplesPerMCU + 1; // may need to update this from DNL marker :(
257 //System.err.println("nMCUVertically "+nMCUVertically);
258 
259 //System.err.println("restartinterval "+restartinterval);
260  mcuCountPerEntropyCodedSegment = (restartinterval == 0) ? nMCUHorizontally * nMCUVertically : restartinterval;
261 //System.err.println("mcuCountPerEntropyCodedSegment "+mcuCountPerEntropyCodedSegment);
262  mcuOffset = 0;
263 
264  ecs = new EntropyCodedSegment(sos,sof,htByClassAndIdentifer,qtByIdentifer,nMCUHorizontally,redactionShapes,copying,dumping,decompressing,decompressedOutput);
265  }
266  byte[] bytesToDecompress = byteAccumulator.toByteArray();
267 //System.err.println("bytesToDecompress length "+bytesToDecompress.length);
268 //System.err.println("mcuOffset "+mcuOffset);
269  int mcuStillNeeded = (nMCUHorizontally * nMCUVertically) - mcuOffset;
270 //System.err.println("mcuStillNeeded "+mcuStillNeeded);
271  int mcuNeededThisInterval = mcuCountPerEntropyCodedSegment > mcuStillNeeded ? mcuStillNeeded : mcuCountPerEntropyCodedSegment; // Do NOT attempt to read beyond what is needed
272 //System.err.println("mcuNeededThisInterval "+mcuNeededThisInterval);
273  byte[] bytesToCopy = ecs.finish(bytesToDecompress,mcuNeededThisInterval,mcuOffset);
274  if (copying) {
275  copiedRedactedOutputStream.write(bytesToCopy); // NB. EntropyCodedSegment.finish() has already done the zero byte stuffing after 0xff values
276  }
277  byteAccumulator = null;
278  mcuOffset += mcuCountPerEntropyCodedSegment;
279  }
280 
281  if (sawEOF) {
282  // would have stopped already if we saw an EOI, so can assume it was missing
283  if (copying) {
284 //System.err.println("inserting missing EOI because premature EOF");
285  copiedRedactedOutputStream.write(0xff); copiedRedactedOutputStream.write(Markers.EOI);
286  }
287  break;
288  }
289 
290  marker|=0xff00; // convention is to express them with the leading ff, so that is what we look up
291 
292  if (dumping) System.err.print("Offset "+Utilities.toPaddedHexString(offset,4)+" Marker "+Utilities.toPaddedHexString(marker,4)+" "+Markers.getAbbreviation(marker)+" "+Markers.getDescription(marker)+" ");
293 
294  offset+=2; // wait till after we have printed it to increment it
295 
296  if (Markers.isVariableLengthJPEGSegment(marker)) {
297  int length=Utilities.read16be(in);
298  if (length == -1) {
299  throw new Exception("Error - variable length marker without length at Offset "+Utilities.toPaddedHexString(offset,4));
300  }
301  else {
302  offset+=2;
303  if (dumping) System.err.print("length variable "+Utilities.toPaddedHexString(length,2)+" ");
304  }
305 
306  if (length > 2) {
307  byte[] b = new byte[length-2];
308  int count = in.read(b,0,length-2);
309  if (count != length-2) {
310  throw new Exception("Error - couldn't read variable length parameter sequence at Offset "+Utilities.toPaddedHexString(offset,4));
311  }
312  else {
313  switch (marker) {
314  case Markers.SOS:
315  sos = new MarkerSegmentSOS(b,length-2);
316  if (dumping) System.err.print(sos);
317  if (copying) writeVariableLengthMarkerSegment(copiedRedactedOutputStream,marker,length,b);
318  break;
319  case Markers.SOF0:
320  case Markers.SOF1:
321  case Markers.SOF2:
322  case Markers.SOF3:
323  case Markers.SOF5:
324  case Markers.SOF6:
325  case Markers.SOF7:
326  case Markers.SOF9:
327  case Markers.SOFA:
328  case Markers.SOFB:
329  case Markers.SOFD:
330  case Markers.SOFE:
331  case Markers.SOFF:
332  case Markers.SOF55:
333  sof = new MarkerSegmentSOF(marker,b,length-2);
334  if (dumping) System.err.print(sof);
335  if (copying) writeVariableLengthMarkerSegment(copiedRedactedOutputStream,marker,length,b);
336  if (decompressing) decompressedOutput.configureDecompressedOutput(sof);
337  break;
338  case Markers.DHT:
339  MarkerSegmentDHT dht = new MarkerSegmentDHT(b,length-2);
340  dht.addToMapByClassAndIdentifier(htByClassAndIdentifer); // hokey, but sometimes multiple tables in one segment, sometimes multiple segments
341  if (dumping) System.err.print(dht);
342  if (copying) writeVariableLengthMarkerSegment(copiedRedactedOutputStream,marker,length,b);
343  break;
344  case Markers.DQT:
345  MarkerSegmentDQT dqt = new MarkerSegmentDQT(b,length-2);
346  dqt.addToMapByIdentifier(qtByIdentifer); // hokey, but sometimes multiple tables in one segment, sometimes multiple segments
347  if (dumping) System.err.print(dqt);
348  if (copying) writeVariableLengthMarkerSegment(copiedRedactedOutputStream,marker,length,b);
349  break;
350  //case Markers.LSE
351  // break;
352  case Markers.DRI:
353  if (length == 4) {
354  restartinterval = Utilities.extract16be(b,0);
355  }
356  else if (length == 5) {
357  restartinterval = (int)Utilities.extract24be(b,0);
358  }
359  else if (length == 6) {
360  restartinterval = (int)Utilities.extract32be(b,0);
361  }
362  else {
363  throw new Exception("Illegal length "+length+" of restart interval at Offset "+Utilities.toPaddedHexString(offset,4));
364  }
365  if (dumping) System.err.print("\n\tDRI - Define Restart Interval = "+Utilities.toPaddedHexString(restartinterval,4)+"\n");
366  if (copying) writeVariableLengthMarkerSegment(copiedRedactedOutputStream,marker,length,b);
367  break;
368  case Markers.DNL:
369  long numberoflines;
370  if (length == 4) {
371  numberoflines = Utilities.extract16be(b,0);
372  }
373  else if (length == 5) {
374  numberoflines = Utilities.extract24be(b,0);
375  }
376  else if (length == 6) {
377  numberoflines = Utilities.extract32be(b,0);
378  }
379  else {
380  throw new Exception("Illegal length "+length+" of number of lines at Offset "+Utilities.toPaddedHexString(offset,4));
381  }
382  if (dumping) System.err.print("\n\tDNL - Define Number of Lines = "+Utilities.toPaddedHexString(numberoflines,4)+"\n");
383  if (copying) writeVariableLengthMarkerSegment(copiedRedactedOutputStream,marker,length,b);
384  break;
385  //case Markers.COD:
386  // break;
387  //case Markers.COM:
388  // // do NOT copy COM marker segments ... may leak identity
389  // break;
390  case Markers.APP0:
391  case Markers.APP1:
392  case Markers.APP2:
393  String magic = "";
394  {
395  StringBuffer magicbuf = new StringBuffer();
396  for (int i=0; i<b.length && b[i] != 0; ++i) {
397  magicbuf.append(Character.valueOf((char)b[i]));
398  }
399  magic = magicbuf.toString();
400  }
401  if (dumping) System.err.print(magic);
402  if (marker == Markers.APP0 && magic.equals("JFIF")) {
403  if (dumping) System.err.print(new MarkerSegmentAPP0JFIF(b,length-2));
404  //if (copying) writeVariableLengthMarkerSegment(copiedRedactedOutputStream,marker,length,b);
405  }
406  // may want to consider not copying unrecognized APPn segments ... may leak identity ... copy everything for now :(
407  if (copying) writeVariableLengthMarkerSegment(copiedRedactedOutputStream,marker,length,b);
408  break;
409  default:
410  // may want to consider not copying unrecognized segments ... may leak identity ... copy everything for now :(
411  if (copying) writeVariableLengthMarkerSegment(copiedRedactedOutputStream,marker,length,b);
412  break;
413  }
414  }
415  }
416  else {
417  if (dumping) System.err.print("Warning - variable length marker without \"zero\" length (really 2)");
418  }
419  offset+=(length-2);
420  }
421  else if (Markers.isNoLengthJPEGSegment(marker)) {
422  if (copying) { copiedRedactedOutputStream.write(0xff); copiedRedactedOutputStream.write(marker&0xff);}
423  if (marker == Markers.EOI) {
424  // stop rather than process padding to end of file, so as not to create spurious empty EntropyCodedSegment
425  if (dumping) System.err.print("\n");
426  break;
427  }
428  }
429  else {
430  int length=Markers.isFixedLengthJPEGSegment(marker);
431  switch (length) {
432  case 0:
433  break;
434  case 3:
435  {
436  int value = in.read();
437  if (value != -1) {
438  offset+=1;
439  if (dumping) System.err.print("length fixed 3 value "+Utilities.toPaddedHexString(value,2)+" ");
440  if (copying) { writeMarkerAndLength(copiedRedactedOutputStream,marker,length); copiedRedactedOutputStream.write(value&0xff); }
441  }
442  else {
443  throw new Exception("Error - fixed length 3 marker without value at Offset "+Utilities.toPaddedHexString(offset,4));
444  }
445  }
446  break;
447  case 4:
448  {
449  int value=Utilities.read16be(in);
450  if (value != -1) {
451  offset+=2;
452  if (dumping) System.err.print("length fixed 4 value "+Utilities.toPaddedHexString(value,2)+" ");
453  if (copying) { writeMarkerAndLength(copiedRedactedOutputStream,marker,length); copiedRedactedOutputStream.write((value>>>8)&0xff); copiedRedactedOutputStream.write(value&0xff); }
454  }
455  else {
456  throw new Exception("Error - fixed length 4 marker without value at Offset "+Utilities.toPaddedHexString(offset,4));
457  }
458  }
459  break;
460  default:
461  throw new Exception("Error - fixed length marker with unexpected length "+length+" at Offset "+Utilities.toPaddedHexString(offset,4));
462  //break;
463  }
464  }
465 
466  if (dumping) System.err.print("\n");
467  markerprefix=in.read();
468  }
469 
470  if (copying) {
471  copiedRedactedOutputStream.close();
472  }
473  if (decompressing) {
474  decompressedOutput.close();
475  }
476  return new MarkerSegmentsFoundDuringParse(sos,sof,htByClassAndIdentifer,qtByIdentifer);
477  }
478 
491  public static MarkerSegmentsFoundDuringParse parse(InputStream in,OutputStream copiedRedactedOutputStream,Vector<Shape> redactionShapes) throws Exception, IOException {
492  return parse(in,copiedRedactedOutputStream,redactionShapes,null/*decompressedOutput*/);
493  }
503  public static void main(String arg[]) {
504  try {
505  InputStream in = new FileInputStream(arg[0]);
506  OutputStream copiedCompressedOutput = arg.length > 1 && arg[1].length() > 0 ? new FileOutputStream(arg[1]) : null;
507  DecompressedOutput decompressedOutput = arg.length > 2 && arg[2].length() > 0 ? new DecompressedOutput(new File(arg[2]),ByteOrder.BIG_ENDIAN) : null;
508  long startTime = System.currentTimeMillis();
509  parse(in,copiedCompressedOutput,null,decompressedOutput);
510  long currentTime = System.currentTimeMillis();
511  long runTime = currentTime-startTime;
512 System.err.println("Took = "+runTime+" ms");
513  }
514  catch (Exception e) {
515  e.printStackTrace(System.err);
516  }
517  }
518 
519 }
520 
static final int extract16be(byte[] b, int offset)
Definition: Utilities.java:46
MarkerSegmentsFoundDuringParse(MarkerSegmentSOS sos, MarkerSegmentSOF sof, Map< String, HuffmanTable > htByClassAndIdentifer, Map< String, QuantizationTable > qtByIdentifer)
Definition: Parse.java:144
void addToMapByIdentifier(Map< String, QuantizationTable > qtByIdentifer)
static void main(String arg[])
Definition: Parse.java:503
static final boolean isDCT(int marker)
Definition: Markers.java:228
static final int isFixedLengthJPEGSegment(int marker)
Definition: Markers.java:131
static final boolean isVariableLengthJPEGSegment(int marker)
Definition: Markers.java:183
static final String getDescription(int marker)
Definition: Markers.java:384
static MarkerSegmentsFoundDuringParse parse(InputStream in, OutputStream copiedRedactedOutputStream, Vector< Shape > redactionShapes)
Definition: Parse.java:491
void configureDecompressedOutput(MarkerSegmentSOF sof)
Definition: Parse.java:78
Map< String, QuantizationTable > getQuantizationTableByIdentifer()
Definition: Parse.java:142
void addToMapByClassAndIdentifier(Map< String, HuffmanTable > htByClassAndIdentifer)
OutputArrayOrStream [] getDecompressedOutputPerComponent()
Definition: Parse.java:76
static MarkerSegmentsFoundDuringParse parse(InputStream in, OutputStream copiedRedactedOutputStream, Vector< Shape > redactionShapes, DecompressedOutput decompressedOutput)
Definition: Parse.java:169
static final boolean isNoLengthJPEGSegment(int marker)
Definition: Markers.java:142
Map< String, HuffmanTable > getHuffmanTableByClassAndIdentifer()
Definition: Parse.java:141
DecompressedOutput(File fileBasis, ByteOrder order)
Definition: Parse.java:71
static final long extract24be(byte[] b, int offset)
Definition: Utilities.java:50
static String toPaddedHexString(int i, int length)
Definition: Utilities.java:18
static final long extract32be(byte[] b, int offset)
Definition: Utilities.java:54
static final String getAbbreviation(int marker)
Definition: Markers.java:379
final byte [] finish(byte[] bytesToDecompress, int mcuCount, int mcuOffset)
static final int read16be(InputStream in)
Definition: Utilities.java:58