정규식 테스트 사이트
1. http://gskinner.com/RegExr/
2. http://www.regexplanet.com/advanced/java/index.html (JAVA용)
참고 정규식
일반 URL + IP 주소
((?i)\b(((?:https?:)|(?:ftp?:)//|www\d{0,3}[.]|([a-z0-9\-]{2,}[.](([a-z0-9\-]{2,}[.][a-z0-9\-]{2,})|[a-z0-9\-]{2,}))|([a-z0-9\-]{2,}[.])|([2]([5][0-5]|[0-4][0-9])|[0-1]?[0-9]{1,2})(\.([2]([5][0-5]|[0-4][0-9])|[0-1]?[0-9]{1,2})){3}|(\w|\-)+\..{2,4}(?:\..{2,3})?(?:$|(?=\/))|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))|([2]([5][0-5]|[0-4][0-9])|[0-1]?[0-9]{1,2})(\.([2]([5][0-5]|[0-4][0-9])|[0-1]?[0-9]{1,2})){3}|(\w|\-)+\..{2,4}(?:\..{2,3})?(?:$|(?=\/)))
아래는 Android 정규식이다.
(주소 : http://grepcode.com/file/repository.grepcode.com/java/ext/com.google.android/android/2.0_r1/android/text/util/Regex.java#Regex.0WEB_URL_PATTERN)
TOP_LEVEL_DOMAIN_PATTERN
"((aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
+ "|(biz|b[abdefghijmnorstvwyz])"
+ "|(cat|com|coop|c[acdfghiklmnoruvxyz])"
+ "|d[ejkmoz]"
+ "|(edu|e[cegrstu])"
+ "|f[ijkmor]"
+ "|(gov|g[abdefghilmnpqrstuwy])"
+ "|h[kmnrtu]"
+ "|(info|int|i[delmnoqrst])"
+ "|(jobs|j[emop])"
+ "|k[eghimnrwyz]"
+ "|l[abcikrstuvy]"
+ "|(mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
+ "|(name|net|n[acefgilopruz])"
+ "|(org|om)"
+ "|(pro|p[aefghklmnrstwy])"
+ "|qa"
+ "|r[eouw]"
+ "|s[abcdeghijklmnortuvyz]"
+ "|(tel|travel|t[cdfghjklmnoprtvwz])"
+ "|u[agkmsyz]"
+ "|v[aceginu]"
+ "|w[fs]"
+ "|y[etu]"
+ "|z[amw])");
WEB_URL_PATTERN
"((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
+ "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
+ "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
+ "((?:(?:[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}\\.)+" // named host
+ "(?:" // plus top level domain
+ "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
+ "|(?:biz|b[abdefghijmnorstvwyz])"
+ "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])"
+ "|d[ejkmoz]"
+ "|(?:edu|e[cegrstu])"
+ "|f[ijkmor]"
+ "|(?:gov|g[abdefghilmnpqrstuwy])"
+ "|h[kmnrtu]"
+ "|(?:info|int|i[delmnoqrst])"
+ "|(?:jobs|j[emop])"
+ "|k[eghimnrwyz]"
+ "|l[abcikrstuvy]"
+ "|(?:mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
+ "|(?:name|net|n[acefgilopruz])"
+ "|(?:org|om)"
+ "|(?:pro|p[aefghklmnrstwy])"
+ "|qa"
+ "|r[eouw]"
+ "|s[abcdeghijklmnortuvyz]"
+ "|(?:tel|travel|t[cdfghjklmnoprtvwz])"
+ "|u[agkmsyz]"
+ "|v[aceginu]"
+ "|w[fs]"
+ "|y[etu]"
+ "|z[amw]))"
+ "|(?:(?:25[0-5]|2[0-4]" // or ip address
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]"
+ "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]"
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
+ "|[1-9][0-9]|[0-9])))"
+ "(?:\\:\\d{1,5})?)" // plus option port number
+ "(\\/(?:(?:[a-zA-Z0-9\\;\\/\\?\\:\\@\\&\\=\\#\\~" // plus option query params
+ "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
+ "(?:\\b|$)"); // and finally, a word boundary or end of
// input. This is to stop foo.sure from
// matching as foo.su
1. http://gskinner.com/RegExr/
2. http://www.regexplanet.com/advanced/java/index.html (JAVA용)
참고 정규식
일반 URL + IP 주소
((?i)\b(((?:https?:)|(?:ftp?:)//|www\d{0,3}[.]|([a-z0-9\-]{2,}[.](([a-z0-9\-]{2,}[.][a-z0-9\-]{2,})|[a-z0-9\-]{2,}))|([a-z0-9\-]{2,}[.])|([2]([5][0-5]|[0-4][0-9])|[0-1]?[0-9]{1,2})(\.([2]([5][0-5]|[0-4][0-9])|[0-1]?[0-9]{1,2})){3}|(\w|\-)+\..{2,4}(?:\..{2,3})?(?:$|(?=\/))|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))|([2]([5][0-5]|[0-4][0-9])|[0-1]?[0-9]{1,2})(\.([2]([5][0-5]|[0-4][0-9])|[0-1]?[0-9]{1,2})){3}|(\w|\-)+\..{2,4}(?:\..{2,3})?(?:$|(?=\/)))
아래는 Android 정규식이다.
(주소 : http://grepcode.com/file/repository.grepcode.com/java/ext/com.google.android/android/2.0_r1/android/text/util/Regex.java#Regex.0WEB_URL_PATTERN)
TOP_LEVEL_DOMAIN_PATTERN
"((aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
+ "|(biz|b[abdefghijmnorstvwyz])"
+ "|(cat|com|coop|c[acdfghiklmnoruvxyz])"
+ "|d[ejkmoz]"
+ "|(edu|e[cegrstu])"
+ "|f[ijkmor]"
+ "|(gov|g[abdefghilmnpqrstuwy])"
+ "|h[kmnrtu]"
+ "|(info|int|i[delmnoqrst])"
+ "|(jobs|j[emop])"
+ "|k[eghimnrwyz]"
+ "|l[abcikrstuvy]"
+ "|(mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
+ "|(name|net|n[acefgilopruz])"
+ "|(org|om)"
+ "|(pro|p[aefghklmnrstwy])"
+ "|qa"
+ "|r[eouw]"
+ "|s[abcdeghijklmnortuvyz]"
+ "|(tel|travel|t[cdfghjklmnoprtvwz])"
+ "|u[agkmsyz]"
+ "|v[aceginu]"
+ "|w[fs]"
+ "|y[etu]"
+ "|z[amw])");
WEB_URL_PATTERN
"((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
+ "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
+ "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
+ "((?:(?:[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}\\.)+" // named host
+ "(?:" // plus top level domain
+ "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
+ "|(?:biz|b[abdefghijmnorstvwyz])"
+ "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])"
+ "|d[ejkmoz]"
+ "|(?:edu|e[cegrstu])"
+ "|f[ijkmor]"
+ "|(?:gov|g[abdefghilmnpqrstuwy])"
+ "|h[kmnrtu]"
+ "|(?:info|int|i[delmnoqrst])"
+ "|(?:jobs|j[emop])"
+ "|k[eghimnrwyz]"
+ "|l[abcikrstuvy]"
+ "|(?:mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
+ "|(?:name|net|n[acefgilopruz])"
+ "|(?:org|om)"
+ "|(?:pro|p[aefghklmnrstwy])"
+ "|qa"
+ "|r[eouw]"
+ "|s[abcdeghijklmnortuvyz]"
+ "|(?:tel|travel|t[cdfghjklmnoprtvwz])"
+ "|u[agkmsyz]"
+ "|v[aceginu]"
+ "|w[fs]"
+ "|y[etu]"
+ "|z[amw]))"
+ "|(?:(?:25[0-5]|2[0-4]" // or ip address
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]"
+ "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]"
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
+ "|[1-9][0-9]|[0-9])))"
+ "(?:\\:\\d{1,5})?)" // plus option port number
+ "(\\/(?:(?:[a-zA-Z0-9\\;\\/\\?\\:\\@\\&\\=\\#\\~" // plus option query params
+ "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
+ "(?:\\b|$)"); // and finally, a word boundary or end of
// input. This is to stop foo.sure from
// matching as foo.su
IP_ADDRESS_PATTERN
"((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
+ "|[1-9][0-9]|[0-9]))");
DOMAIN_NAME_PATTERN
"(((([a-zA-Z0-9][a-zA-Z0-9\\-]*)*[a-zA-Z0-9]\\.)+"
+ TOP_LEVEL_DOMAIN_PATTERN + ")|"
+ IP_ADDRESS_PATTERN + ")");
EMAIL_ADDRESS_PATTERN
"[a-zA-Z0-9\\+\\.\\_\\%\\-]{1,256}" +
"\\@" +
"[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}" +
"(" +
"\\." +
"[a-zA-Z0-9][a-zA-Z0-9\\-]{0,25}" +
")+"
PHONE_PATTERN
"(\\+[0-9]+[\\- \\.]*)?" // +<digits><sdd>*
+ "(\\([0-9]+\\)[\\- \\.]*)?" // (<digits>)<sdd>*
+ "([0-9][0-9\\- \\.][0-9\\- \\.]+[0-9])" // <digit><digit|sdd>+<digit>