Coverage Report - org.archive.io.hbase.Keying
 
Classes in this File Line Coverage Branch Coverage Complexity
Keying
0%
0/30
0%
0/32
7
 
 1  
 package org.archive.io.hbase;
 2  
 
 3  
 /**
 4  
  * Copyright 2010 The Apache Software Foundation
 5  
  *
 6  
  * Licensed to the Apache Software Foundation (ASF) under one
 7  
  * or more contributor license agreements.  See the NOTICE file
 8  
  * distributed with this work for additional information
 9  
  * regarding copyright ownership.  The ASF licenses this file
 10  
  * to you under the Apache License, Version 2.0 (the
 11  
  * "License"); you may not use this file except in compliance
 12  
  * with the License.  You may obtain a copy of the License at
 13  
  *
 14  
  *     http://www.apache.org/licenses/LICENSE-2.0
 15  
  *
 16  
  * Unless required by applicable law or agreed to in writing, software
 17  
  * distributed under the License is distributed on an "AS IS" BASIS,
 18  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 19  
  * See the License for the specific language governing permissions and
 20  
  * limitations under the License.
 21  
  */
 22  
 
 23  
 import java.util.StringTokenizer;
 24  
 import java.util.regex.Matcher;
 25  
 import java.util.regex.Pattern;
 26  
 
 27  
 /**
 28  
  * Utility creating hbase friendly keys. Use fabricating row names or column
 29  
  * qualifiers.
 30  
  * <p>
 31  
  * TODO: Add createSchemeless key, a key that doesn't care if scheme is http or
 32  
  * https.
 33  
  * 
 34  
  * @see Bytes#split(byte[], byte[], int)
 35  
  */
 36  0
 public class Keying {
 37  
         public static final String REFERER_URL_SCHEME = "r:";
 38  
 
 39  0
         private static final Pattern URI_RE_PARSER = Pattern.compile("^([^:/?#]+://(?:[^/?#@]+@)?)([^:/?#]+)(.*)$");
 40  
 
 41  
         public static final String DOMAIN_NAME_DELIMITER = ".";
 42  
 
 43  
         /**
 44  
          * Makes a key out of passed URI for use as row name or column qualifier.
 45  
          * 
 46  
          * This method runs transforms on the passed URI so it sits better as a key
 47  
          * (or portion-of-a-key) in hbase. The <code>host</code> portion of the URI
 48  
          * authority is reversed so subdomains sort under their parent domain. The
 49  
          * returned String is an opaque URI of an artificial <code>r:</code> scheme
 50  
          * to prevent the result being considered an URI of the original scheme.
 51  
          * Here is an example of the transform: The url
 52  
          * <code>http://lucene.apache.org/index.html?query=something#middle<code> is
 53  
          * returned as
 54  
          * <code>r:http://org.apache.lucene/index.html?query=something#middle</code>
 55  
          * The transforms are reversible. No transform is done if passed URI is not
 56  
          * hierarchical.
 57  
          * 
 58  
          * <p>
 59  
          * If authority <code>userinfo</code> is present, will mess up the sort
 60  
          * (until we do more work).
 61  
          * </p>
 62  
          * 
 63  
          * @param u
 64  
          *            URL to transform.
 65  
          * @return An opaque URI of artificial 'r' scheme with host portion of URI
 66  
          *         authority reversed (if present).
 67  
          * @see #keyToUri(String)
 68  
          * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC2396</a>
 69  
          */
 70  
 
 71  
         public static String createKey(final String u, String scheme) {
 72  0
                 if (scheme != null && scheme.length() > 0 && u.startsWith(scheme)) {
 73  0
                         throw new IllegalArgumentException("Key already starts with a scheme: " + scheme);
 74  
                 }
 75  0
                 Matcher m = getURIMatcher(u);
 76  0
                 if (m == null || !m.matches()) {
 77  
                         // If no match, return original String.
 78  0
                         return u;
 79  
                 }
 80  0
                 return scheme + m.group(1) + reverseHostname(m.group(2)) + m.group(3);
 81  
         }
 82  
 
 83  
         /**
 84  
          * Reverse the {@link #createKey(String)} transform.
 85  
          * 
 86  
          * @param s
 87  
          *            <code>URI</code> made by {@link #createKey(String)}.
 88  
          * @return 'Restored' URI made by reversing the {@link #createKey(String)}
 89  
          *         transform.
 90  
          */
 91  
         public static String keyToUri(final String s, final String scheme) {
 92  0
                 if (scheme == null || s == null) {
 93  0
                         return s;
 94  0
                 } else if (!s.toLowerCase().startsWith(scheme.toLowerCase())) {
 95  0
                         return s;
 96  
                 }
 97  
                 // here we have a matching scheme
 98  0
                 Matcher uriMatchObject = getURIMatcher(s.substring(scheme.length()));
 99  0
                 if (uriMatchObject == null || !uriMatchObject.matches()) {
 100  
                         // If no match, return original String.
 101  0
                         return s;
 102  
                 }
 103  
                 // only return a modified key if we have a matching scheme and both
 104  
                 // arguments are not null
 105  0
                 return uriMatchObject.group(1) + reverseHostname(uriMatchObject.group(2)) + uriMatchObject.group(3);
 106  
         }
 107  
 
 108  
         private static Matcher getURIMatcher(final String uriText) {
 109  0
                 if (uriText == null || uriText.length() <= 0) {
 110  0
                         return null;
 111  
                 }
 112  0
                 return URI_RE_PARSER.matcher(uriText);
 113  
         }
 114  
 
 115  
         public static String reverseHostname(final String hostname) {
 116  0
                 if (hostname == null) {
 117  0
                         return "";
 118  
                 }
 119  0
                 StringBuilder sb = new StringBuilder(hostname.length());
 120  
                 Object next;
 121  0
                 for (StringTokenizer st = new StringTokenizer(hostname, DOMAIN_NAME_DELIMITER, false); st.hasMoreElements();) {
 122  0
                         next = st.nextElement();
 123  
                         // prepend each element to the string buffer object to return a
 124  
                         // revered list of the input.
 125  0
                         if (sb.length() > 0) {
 126  0
                                 sb.insert(0, DOMAIN_NAME_DELIMITER);
 127  
                         }
 128  0
                         sb.insert(0, next);
 129  
                 }
 130  0
                 if (sb.length() != hostname.length()) {
 131  0
                         throw new RuntimeException("given hostname: " + hostname + " was reversed to reflect a revers'ed hostname: " + sb.toString()
 132  
                                 + " but input and output string lengths do not match.  Please debug and fix immediately.");
 133  
                 }
 134  0
                 return sb.toString();
 135  
         }
 136  
 }