View Javadoc
1   package org.csveed.token;
2   
3   import static org.csveed.token.EncounteredSymbol.END_OF_FILE_SYMBOL;
4   import static org.csveed.token.EncounteredSymbol.EOL_SYMBOL;
5   import static org.csveed.token.EncounteredSymbol.EOL_SYMBOL_TRASH;
6   import static org.csveed.token.EncounteredSymbol.ESCAPE_SYMBOL;
7   import static org.csveed.token.EncounteredSymbol.OTHER_SYMBOL;
8   import static org.csveed.token.EncounteredSymbol.QUOTE_SYMBOL;
9   
10  import java.util.Map;
11  import java.util.TreeMap;
12  
13  import org.csveed.report.CsvException;
14  import org.csveed.report.GeneralError;
15  import org.slf4j.Logger;
16  import org.slf4j.LoggerFactory;
17  
18  public class SymbolMapping {
19  
20      private static final Logger LOG = LoggerFactory.getLogger(SymbolMapping.class);
21  
22      private Map<EncounteredSymbol, char[]> symbolToChars = new TreeMap<>();
23      private Map<Character, EncounteredSymbol> charToSymbol = new TreeMap<>();
24  
25      private Character escapeCharacter;
26  
27      private Character quoteCharacter;
28  
29      private boolean settingsLogged;
30  
31      private int startLine = 1;
32  
33      private boolean skipCommentLines = true;
34  
35      // When multiple EOL characters have been given,
36      // only the first one encountered will be accepted.
37      private char acceptedEndOfLine;
38  
39      public SymbolMapping() {
40          initDefaultMapping();
41      }
42  
43      public void initDefaultMapping() {
44          addMapping(EncounteredSymbol.ESCAPE_SYMBOL, '"');
45          addMapping(EncounteredSymbol.QUOTE_SYMBOL, '"');
46          addMapping(EncounteredSymbol.SEPARATOR_SYMBOL, ';');
47          addMapping(EncounteredSymbol.EOL_SYMBOL, new char[] { '\r', '\n' } );
48          addMapping(EncounteredSymbol.SPACE_SYMBOL, ' ');
49          addMapping(EncounteredSymbol.COMMENT_SYMBOL, '#');
50      }
51  
52      public char getFirstMappedCharacter(EncounteredSymbol encounteredSymbol) {
53          char[] mappedCharacters = getMappedCharacters(encounteredSymbol);
54          return mappedCharacters == null ? 0 : mappedCharacters[0];
55      }
56  
57      public char[] getMappedCharacters(EncounteredSymbol encounteredSymbol) {
58          return symbolToChars.get(encounteredSymbol);
59      }
60  
61      public void addMapping(EncounteredSymbol symbol, Character character) {
62          addMapping(symbol, new char[] { character } );
63          if (symbol.isCheckForSimilarEscapeAndQuote()) {
64              storeCharacterForLaterComparison(symbol, character);
65          }
66      }
67  
68      public void addMapping(EncounteredSymbol symbol, char[] characters) {
69          while (charToSymbol.values().remove(symbol));
70          for (Character character : characters) {
71              charToSymbol.put(character, symbol);
72          }
73          symbolToChars.put(symbol, characters);
74      }
75  
76      public void logSettings() {
77          if (settingsLogged) {
78              return;
79          }
80          LOG.info("- CSV config / skip comment lines? {}", isSkipCommentLines() ? "yes" : "no");
81          LOG.info("- CSV config / start line: {}", startLine);
82          for (EncounteredSymbol symbol : symbolToChars.keySet()) {
83              char[] characters = symbolToChars.get(symbol);
84              LOG.info("- CSV config / Characters for {} {}", symbol, charactersToString(characters));
85          }
86          settingsLogged = true;
87      }
88  
89      private String charactersToString(char[] characters) {
90          StringBuilder returnString = new StringBuilder();
91          for (char currentChar : characters) {
92              returnString.append(charToPrintable(currentChar));
93              returnString.append(" ");
94          }
95          return returnString.toString();
96      }
97  
98      private String charToPrintable(char character) {
99          switch(character) {
100             case '\t' : return "\\t";
101             case '\n' : return "\\n";
102             case '\r' : return "\\r";
103             default: return Character.toString(character);
104         }
105     }
106 
107     private void storeCharacterForLaterComparison(EncounteredSymbol symbol, Character character) {
108         if (symbol == ESCAPE_SYMBOL) {
109             escapeCharacter = character;
110         } else if (symbol == QUOTE_SYMBOL) {
111             quoteCharacter = character;
112         }
113     }
114 
115     public boolean isSameCharactersForEscapeAndQuote() {
116         return escapeCharacter != null && quoteCharacter != null && escapeCharacter.equals(quoteCharacter);
117     }
118 
119     public EncounteredSymbol find(int character, ParseState parseState) {
120         if (character == -1) {
121             return END_OF_FILE_SYMBOL;
122         }
123         EncounteredSymbol symbol = charToSymbol.get((char)character);
124         if (symbol == null) {
125             return OTHER_SYMBOL;
126         }
127         if (symbol == EOL_SYMBOL) {
128             if (acceptedEndOfLine == 0) {
129                 LOG.info("- Triggering EOL character: {}", character);
130                 acceptedEndOfLine = (char)character;
131             }
132             if (acceptedEndOfLine != character) {
133                 symbol = EOL_SYMBOL_TRASH;
134             }
135         }
136         if (symbol.isCheckForSimilarEscapeAndQuote() && isSameCharactersForEscapeAndQuote()) {
137             return parseState.isUpgradeQuoteToEscape() ? ESCAPE_SYMBOL : QUOTE_SYMBOL;
138         }
139         return symbol;
140     }
141 
142     public int getStartLine() {
143         return startLine;
144     }
145 
146     public void setStartLine(int startLine) {
147         if (startLine == 0) {
148             throw new CsvException(new GeneralError("Row cannot be set at 0. Rows are 1-based"));
149         }
150         this.startLine = startLine;
151     }
152 
153     public boolean isSkipCommentLines() {
154         return skipCommentLines;
155     }
156 
157     public void setSkipCommentLines(boolean skipCommentLines) {
158         this.skipCommentLines = skipCommentLines;
159     }
160 }