1 files changed, 189 insertions, 102 deletions
diff --git a/app/src/main/java/github/daneren2005/dsub/util/tags/ID3v2File.java b/app/src/main/java/github/daneren2005/dsub/util/tags/ID3v2File.java
index 69668475..4fb7418d 100644
--- a/app/src/main/java/github/daneren2005/dsub/util/tags/ID3v2File.java
+++ b/app/src/main/java/github/daneren2005/dsub/util/tags/ID3v2File.java
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2013 Adrian Ulrich <adrian@blinkenlights.ch>
+ * Copyright (C) 2013-2016 Adrian Ulrich <adrian@blinkenlights.ch>
+ * Copyright (C) 2017-2018 Google Inc.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -12,7 +13,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>. 
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
 package github.daneren2005.dsub.util.tags;
@@ -21,160 +22,246 @@ import java.io.IOException;
 import java.io.RandomAccessFile;
 import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.Locale;
+import java.util.Enumeration;
+
 
 
 public class ID3v2File extends Common {
-	private static int ID3_ENC_LATIN   = 0x00;
-	private static int ID3_ENC_UTF16LE = 0x01;
-	private static int ID3_ENC_UTF16BE = 0x02;
-	private static int ID3_ENC_UTF8    = 0x03;
-	
+	private static final int ID3_ENC_LATIN   = 0x00;
+	private static final int ID3_ENC_UTF16   = 0x01;
+	private static final int ID3_ENC_UTF16BE = 0x02;
+	private static final int ID3_ENC_UTF8    = 0x03;
+	private static final HashMap<String, String> sOggNames;
+	static {
+		// ID3v2.3 -> ogg mapping
+		sOggNames = new HashMap<String, String>();
+		sOggNames.put("TIT2", "TITLE");
+		sOggNames.put("TALB", "ALBUM");
+		sOggNames.put("TPE1", "ARTIST");
+		sOggNames.put("TPE2", "ALBUMARTIST");
+		sOggNames.put("TYER", "YEAR");
+		sOggNames.put("TPOS", "DISCNUMBER");
+		sOggNames.put("TRCK", "TRACKNUMBER");
+		sOggNames.put("TCON", "GENRE");
+		sOggNames.put("TCOM", "COMPOSER");
+		// ID3v2.2 3-character names
+		sOggNames.put("TT2", "TITLE");
+		sOggNames.put("TAL", "ALBUM");
+		sOggNames.put("TP1", "ARTIST");
+		sOggNames.put("TP2", "ALBUMARTIST");
+		sOggNames.put("TYE", "YEAR");
+		sOggNames.put("TRK", "TRACKNUMBER");
+		sOggNames.put("TCO", "GENRE");
+		sOggNames.put("TCM", "COMPOSER");
+	}
+
+	// Holds a key-value pair
+	private class TagItem {
+		String key;
+		String value;
+		public TagItem(String key, String value) {
+			this.key = key;
+			this.value = value;
+		}
+	}
+
 	public ID3v2File() {
 	}
-	
+
 	public HashMap getTags(RandomAccessFile s) throws IOException {
 		HashMap tags = new HashMap();
-		
+
 		final int v2hdr_len = 10;
 		byte[] v2hdr = new byte[v2hdr_len];
-		
+
 		// read the whole 10 byte header into memory
 		s.seek(0);
 		s.read(v2hdr);
-		
-		int id3v   = ((b2be32(v2hdr,0))) & 0xFF;   // swapped ID3\04 -> ver. ist the first byte
-		int v3len  = ((b2be32(v2hdr,6)));          // total size EXCLUDING the this 10 byte header
-		v3len      = ((v3len & 0x7f000000) >> 3) | // for some funky reason, this is encoded as 7*4 bits
-		             ((v3len & 0x007f0000) >> 2) |
-		             ((v3len & 0x00007f00) >> 1) |
-		             ((v3len & 0x0000007f) >> 0) ;
-		
-		// debug(">> tag version ID3v2."+id3v);
-		// debug(">> LEN= "+v3len+" // "+v3len);
-		
+
+		int v3major = (b2be32(v2hdr, 0)) & 0xFF;   // swapped ID3\04 -> ver. ist the first byte
+		int v3minor = (b2be32(v2hdr, 1)) & 0xFF;   // minor version, not used by us.
+		int v3flags = (b2be32(v2hdr, 2)) & 0xFF;   // flags such as extended headers.
+		int v3len   = (b2be32(v2hdr, 6));          // total size EXCLUDING the this 10 byte header
+		v3len       = unsyncsafe(v3len);
+
+		// In 2.4, bit #6 indicates whether or not this file has an extended header
+		boolean flag_ext_hdr = v3major >= 4 && (v3flags & (1 << 6)) != 0;
+
+		if (flag_ext_hdr) {
+			// The extended header is at least 6 bytes:
+			// * 4 byts of size
+			// * 1 byte numflags
+			// * 1 byte extended flags
+			byte[] exthdr = new byte[6];
+			long pos = s.getFilePointer();
+			s.read(exthdr);
+
+			// we got the length, so we can seek to the header end.
+			int extlen = (b2be32(exthdr, 0));
+			s.seek(pos + extlen);
+		}
+
 		// we should already be at the first frame
 		// so we can start the parsing right now
-		tags = parse_v3_frames(s, v3len);
+		tags = parse_v3_frames(s, v3len, v3major);
 		tags.put("_hdrlen", v3len+v2hdr_len);
 		return tags;
 	}
-	
+
+	/*
+	**  converts syncsafe integer to Java integer
+	*/
+	private int unsyncsafe(int x) {
+		x     = ((x & 0x7f000000) >> 3) |
+				((x & 0x007f0000) >> 2) |
+				((x & 0x00007f00) >> 1) |
+				((x & 0x0000007f) >> 0) ;
+		return x;
+	}
+
+	/**
+	 * Calculates the frame length baased on the frame size and the
+	 */
+	private int calculateFrameLength(byte[] frame, int offset, int v3major) {
+		// ID3v2 (aka ID3v2.2) had a 3-byte unencoded length field.
+		if (v3major < 3) {
+			return (frame[offset] << 16) + (frame[offset+1] << 8) + frame[offset+2];
+		}
+		int rawlen = b2be32(frame, offset);
+		// Encoders prior ID3v2.4 did not encode the frame length
+		if (v3major < 4) {
+			return rawlen;
+		}
+		return unsyncsafe(rawlen);
+	}
+
 	/* Parses all ID3v2 frames at the current position up until payload_len
 	** bytes were read
 	*/
-	public HashMap parse_v3_frames(RandomAccessFile s, long payload_len) throws IOException {
+	public HashMap parse_v3_frames(RandomAccessFile s, long payload_len, int v3major) throws IOException {
 		HashMap tags = new HashMap();
-		byte[] frame   = new byte[10]; // a frame header is always 10 bytes
-		long bread     = 0;            // total amount of read bytes
-		
+		// ID3v2 (aka ID3v2.2) had a 6-byte header of a 3-byte name and a 3-byte length.
+		// ID3v2.3 increased the header size to 10 bytes, with a 4-byte name and a 4-byte length
+		int namelen = (v3major >= 3 ? 4 : 3);
+		int headerlen = (v3major >= 3 ? 10 : 6);
+		byte[] frame   = new byte[headerlen];
+		long bread     = 0;                      // total amount of read bytes
+
 		while(bread < payload_len) {
 			bread += s.read(frame);
-			String framename = new String(frame, 0, 4);
-			int slen = b2be32(frame, 4);
-			
+			String framename = new String(frame, 0, namelen);
+			int slen = calculateFrameLength(frame, namelen, v3major);
 			/* Abort on silly sizes */
-			if(slen < 1 || slen > 524288)
+			long bytesRemaining = payload_len - bread;
+			if(slen < 1 || slen > bytesRemaining)
 				break;
-			
+
 			byte[] xpl = new byte[slen];
 			bread += s.read(xpl);
 
 			if(framename.substring(0,1).equals("T")) {
-				String[] nmzInfo = normalizeTaginfo(framename, xpl);
-
-				for(int i = 0; i < nmzInfo.length; i += 2) {
-					String oggKey = nmzInfo[i];
-					String decPld = nmzInfo[i + 1];
-
-					if (oggKey.length() > 0 && !tags.containsKey(oggKey)) {
-						addTagEntry(tags, oggKey, decPld);
+				TagItem nti = normalizeTaginfo(framename, xpl);
+				if (nti.key.length() > 0) {
+					for (TagItem ti : splitTagPayload(nti)) {
+						addTagEntry(tags, ti.key, ti.value);
 					}
 				}
 			}
 			else if(framename.equals("RVA2")) {
 				//
 			}
-			
+
 		}
 		return tags;
 	}
-	
+
+	/* Split null-separated tags into individual elements */
+	private ArrayList<TagItem> splitTagPayload(TagItem in) {
+		ArrayList res = new ArrayList<TagItem>();
+		int i = 0;
+
+		if (sOggNames.containsValue(in.key)) {
+			// Only try to split if there are more than two chars and the string does NOT look UTF16 encoded.
+			if (in.value.length() >= 2 && in.value.charAt(0) != 0 && in.value.charAt(1) != 0) {
+				for (String item : in.value.split("\0")) {
+					if (item.length() > 0) { // do not add empty items, avoids thrashing if the string is zero padded.
+						res.add(new TagItem(in.key, item));
+					}
+					i++;
+				}
+			}
+		}
+
+		if (i == 0) {
+			res.add(in);
+		}
+		return res;
+	}
+
 	/* Converts ID3v2 sillyframes to OggNames */
-	private String[] normalizeTaginfo(String k, byte[] v) {
-		String[] rv = new String[] {"",""};
-		HashMap lu = new HashMap<String, String>();
-		lu.put("TIT2", "TITLE");
-		lu.put("TALB", "ALBUM");
-		lu.put("TPE1", "ARTIST");
-		
-		if(lu.containsKey(k)) {
+	private TagItem normalizeTaginfo(String k, byte[] v) {
+		TagItem ti = new TagItem("", "");
+		if(sOggNames.containsKey(k)) {
 			/* A normal, known key: translate into Ogg-Frame name */
-			rv[0] = (String)lu.get(k);
-			rv[1] = getDecodedString(v);
+			ti.key = (String)sOggNames.get(k);
+			ti.value = getDecodedString(v);
 		}
 		else if(k.equals("TXXX")) {
 			/* A freestyle field, ieks! */
 			String txData[] = getDecodedString(v).split(Character.toString('\0'), 2);
 			/* Check if we got replaygain info in key\0value style */
-			if(txData.length == 2) {
-				if(txData[0].matches("^(?i)REPLAYGAIN_(ALBUM|TRACK)_GAIN$")) {
-					rv[0] = txData[0].toUpperCase(); /* some tagwriters use lowercase for this */
-					rv[1] = txData[1];
-				} else {
-					// Check for replaygain tags just thrown randomly in field
-					int nextStartIndex = 1;
-					int startName = txData[1].toLowerCase(Locale.US).indexOf("replaygain_");
-					ArrayList<String> parts = new ArrayList<String>();
-					while(startName != -1) {
-						int endName = txData[1].indexOf((char) 0, startName);
-						if(endName != -1) {
-							parts.add(txData[1].substring(startName, endName).toUpperCase());
-							int endValue = txData[1].indexOf((char) 0, endName + 1);
-							if(endValue != -1) {
-								parts.add(txData[1].substring(endName + 1, endValue));
-								nextStartIndex = endValue + 1;
-							} else {
-								break;
-							}
-						} else {
-							break;
-						}
-
-						startName = txData[1].toLowerCase(Locale.US).indexOf("replaygain_", nextStartIndex);
-					}
-
-					if(parts.size() > 0) {
-						rv = new String[parts.size()];
-						rv = parts.toArray(rv);
-					}
-				}
+			if(txData.length == 2 && txData[0].matches("^(?i)REPLAYGAIN_(ALBUM|TRACK)_GAIN$")) {
+				ti.key = txData[0].toUpperCase(); /* some tagwriters use lowercase for this */
+				ti.value = txData[1];
 			}
 		}
-		
-		return rv;
+
+		return ti;
 	}
-	
+
 	/* Converts a raw byte-stream text into a java String */
 	private String getDecodedString(byte[] raw) {
 		int encid = raw[0] & 0xFF;
-		int len   = raw.length;
-		String v  = "";
+		int skip  = 1;
+		String cs = "ISO-8859-1";
+		String rv  = "";
 		try {
-			if(encid == ID3_ENC_LATIN) {
-				v = new String(raw, 1, len-1, "ISO-8859-1");
-			}
-			else if (encid == ID3_ENC_UTF8) {
-				v = new String(raw, 1, len-1, "UTF-8");
-			}
-			else if (encid == ID3_ENC_UTF16LE) {
-				v = new String(raw, 3, len-3, "UTF-16LE");
+			switch (encid) {
+				case ID3_ENC_UTF8:
+					cs = "UTF-8";
+					break;
+				case ID3_ENC_UTF16BE:
+					cs = "UTF-16BE";
+					skip = 3;
+					break;
+				case ID3_ENC_UTF16:
+					cs = "UTF-16";
+					if (raw.length > 4) {
+						if ((raw[1]&0xFF) == 0xFE && (raw[2]&0XFF) == 0xFF && (raw[3]&0xFF) == 0x00 && (raw[4]&0xFF) == 0x00) {
+							// buggy tag written by lame?!
+							raw[3] = raw[2];
+							raw[4] = raw[1];
+							skip = 3;
+						} else if((raw[1]&0xFF) == 0xFF && (raw[2]&0XFF) == 0x00 && (raw[3]&0xFF) == 0xFE) {
+							// ?!, but seen in the wild
+							raw[2] = raw[1];
+							skip = 2;
+						}
+					}
+					break;
+				case ID3_ENC_LATIN:
+				default:
+					// uses defaults
 			}
-			else if (encid == ID3_ENC_UTF16BE) {
-				v = new String(raw, 3, len-3, "UTF-16BE");
+
+			rv = new String(raw, skip, raw.length-skip, cs);
+
+			if (rv.length() > 0 && rv.substring(rv.length()-1).equals("\0")) {
+				// SOME tag writers seem to null terminate strings, some don't...
+				rv = rv.substring(0, rv.length()-1);
 			}
 		} catch(Exception e) {}
-		return v;
+		return rv;
 	}
-	
+
 }