概述
Tesseract是一個(gè)OCR(Optical Character Recognition哩掺,光學(xué)字符識(shí)別)引擎溯警,在這里我用來開發(fā)Android上能識(shí)別一張圖片上的股票代碼APP功能。
Github地址
這個(gè)庫非常龐大甲锡,反正我是看不出怎么使用在Android開發(fā)上拒名,于是我找了另一個(gè)庫,https://github.com/rmtheis/tess-two 锄奢,應(yīng)該是基于前面的庫制作的失晴。
添加依賴
dependencies {
compile 'com.rmtheis:tess-two:8.0.0'
}
布局
布局非常簡(jiǎn)單,只有右上角一個(gè)導(dǎo)入按鈕:
布局
xml
<?xml version="1.0" encoding="utf-8"?>
<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:paddingBottom="@dimen/activity_vertical_margin"
android:paddingLeft="@dimen/activity_horizontal_margin"
android:paddingRight="@dimen/activity_horizontal_margin"
android:paddingTop="@dimen/activity_vertical_margin">
<!--顯示識(shí)別結(jié)果-->
<TextView
android:id="@+id/text"
android:layout_width="wrap_content"
android:layout_height="wrap_content"/>
<!--識(shí)別過程中的進(jìn)度條-->
<ProgressBar
android:id="@+id/progressBar"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:indeterminate="true"
android:visibility="gone"
android:layout_centerInParent="true"/>
<!--顯示識(shí)別圖片前處理過后的圖片-->
<ImageView
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_alignParentRight="true"
android:layout_alignParentEnd="true"
android:id="@+id/imageView" />
</RelativeLayout>
導(dǎo)入識(shí)別庫
先去這里下載識(shí)別庫拘央,少了這個(gè)識(shí)別庫沒有使用的涂屁,而且不同的識(shí)別庫識(shí)別準(zhǔn)確率也是不一樣的,當(dāng)你發(fā)現(xiàn)準(zhǔn)確率低是可以嘗試換一個(gè)識(shí)別庫或許會(huì)改善灰伟,里面有很多語言的識(shí)別庫拆又,其他語言的不需要關(guān)心,我們只需要記住開頭chi_sim的是簡(jiǎn)體中文栏账,chi_tra是繁體中文帖族,eng是英語,eus應(yīng)該是美式英語挡爵。我使用的是eus.traineddata竖般。
先在項(xiàng)目里新建assert目錄-tessdata目錄-eus.traineddata。
image.png
可以編寫代碼了
直接看代碼
public class MainActivity extends AppCompatActivity {
private static final String TAG = MainActivity.class.getSimpleName();
private static final int REQUEST_PICK_PHOTO = 1;
private TessBaseAPI tessBaseAPI;
private static final String lang = "eus";//識(shí)別庫
//private static final String lang = "chi_sim";
private static final String DATA_PATH = Environment.getExternalStorageDirectory().toString() + "/Tesseract/";
private static final String TESSDATA = "tessdata";
String result = "empty";
private TextView text;
private ProgressBar progressBar;
private ImageView imageView;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
Toolbar toolbar = (Toolbar) findViewById(R.id.toolbar);
setSupportActionBar(toolbar);
text = (TextView) findViewById(R.id.text);
progressBar = (ProgressBar) findViewById(R.id.progressBar);
imageView = (ImageView) findViewById(R.id.imageView);
}
@Override
public boolean onCreateOptionsMenu(Menu menu) {
getMenuInflater().inflate(R.menu.menu_main, menu);
return true;
}
@Override
public boolean onOptionsItemSelected(MenuItem item) {
int id = item.getItemId();
if (id == R.id.dao_ru) {
//打開圖庫選擇圖片
pickPhoto();
}
return super.onOptionsItemSelected(item);
}
private void pickPhoto() {
Intent intent = new Intent(Intent.ACTION_PICK, android.provider.MediaStore.Images.Media.EXTERNAL_CONTENT_URI);
startActivityForResult(intent, REQUEST_PICK_PHOTO);
}
@Override
protected void onActivityResult(int requestCode, int resultCode, Intent data) {
super.onActivityResult(requestCode,resultCode,data);
if (requestCode == REQUEST_PICK_PHOTO && resultCode == RESULT_OK) {
//首先需要把a(bǔ)ssert目錄中的識(shí)別庫拷貝到手機(jī)中
prepareTesseract();
Uri uri = data.getData();
BitmapFactory.Options options = new BitmapFactory.Options();
options.inSampleSize = 1;
Bitmap bitmap = BitmapFactory.decodeFile(getRealImageFilePath(this,uri));
//把圖片處理成黑白的茶鹃,有利于識(shí)別
bitmap = toHeibai(bitmap);
//識(shí)別耗時(shí)涣雕,放在異步處理
new MyAsyckTask().execute( bitmap);
}
}
public static String getRealImageFilePath( Context context,Uri uri) {
if( uri == null ) {
return null;
}
String[] filePathColumn = {MediaStore.Images.Media.DATA};
Cursor cursor = context.getContentResolver().query(uri, filePathColumn, null, null, null);
if (cursor!=null){
if (cursor.moveToFirst()) {
int columnIndex = cursor.getColumnIndex(filePathColumn[0]);
String yourRealPath = cursor.getString(columnIndex);
return yourRealPath;
}
cursor.close();
}
return uri.getPath();
}
//在手機(jī)中新建目錄
private void prepareDirectory(String path) {
File dir = new File(path);
if (!dir.exists()) {
if (!dir.mkdirs()) {
Log.e(TAG, "ERROR: Creation of directory " + path + " failed, check does Android Manifest have permission to write to external storage.");
}
} else {
Log.i(TAG, "Created directory " + path);
}
}
private void prepareTesseract() {
try {
prepareDirectory(DATA_PATH + TESSDATA);
} catch (Exception e) {
e.printStackTrace();
}
copyTessDataFiles(TESSDATA);
}
//拷貝識(shí)別庫到手機(jī)
private void copyTessDataFiles(String path) {
try {
String fileList[] = getAssets().list(path);
for (String fileName : fileList) {
// open file within the assets folder
// if it is not already there copy it to the sdcard
String pathToDataFile = DATA_PATH + path + "/" + fileName;
if (!(new File(pathToDataFile)).exists()) {
InputStream in = getAssets().open(path + "/" + fileName);
OutputStream out = new FileOutputStream(pathToDataFile);
// Transfer bytes from in to out
byte[] buf = new byte[1024];
int len;
while ((len = in.read(buf)) > 0) {
out.write(buf, 0, len);
}
in.close();
out.close();
Log.d(TAG, "Copied " + fileName + "to tessdata");
}
}
} catch (IOException e) {
Log.e(TAG, "Unable to copy files to tessdata " + e.toString());
}
}
//真正從圖片提取內(nèi)容的方法
private String extractText(Bitmap bitmap) {
try {
tessBaseAPI = new TessBaseAPI();
} catch (Exception e) {
Log.e(TAG, e.getMessage());
if (tessBaseAPI == null) {
Log.e(TAG, "TessBaseAPI is null. TessFactory not returning tess object.");
}
}
tessBaseAPI.init(DATA_PATH, lang);
// //EXTRA SETTINGS 提取設(shè)置
// //For example if we only want to detect numbers 白名單
tessBaseAPI.setVariable(TessBaseAPI.VAR_CHAR_WHITELIST, "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_");
//tessBaseAPI.setVariable(TessBaseAPI.VAR_CHAR_WHITELIST, "0123456789");
//
// //blackList Example 黑名單
// tessBaseAPI.setVariable(TessBaseAPI.VAR_CHAR_BLACKLIST, "!@#$%^&*()_+=-qwertyuiop[]}{POIU" +
// "YTRWQasdASDfghFGHjklJKLl;L:'\"\\|~`xcvXCVbnmBNM,./<>?");
Log.d(TAG, "Training file loaded");
tessBaseAPI.setImage(bitmap);
String extractedText = "empty result";
try {
extractedText = tessBaseAPI.getUTF8Text();
} catch (Exception e) {
Log.e(TAG, "Error in recognizing text.");
}
tessBaseAPI.end();
return extractedText;
}
//提取圖片內(nèi)容采用異步執(zhí)行
private class MyAsyckTask extends AsyncTask<Bitmap,Void,String>{
@Override
protected void onPreExecute() {
progressBar.setVisibility(View.VISIBLE);
super.onPreExecute();
}
@Override
protected String doInBackground(final Bitmap... params) {
runOnUiThread(new Runnable() {
@Override
public void run() {
imageView.setImageBitmap(params[0]);
}
});
return extractText(params[0]);
}
@Override
protected void onPostExecute(String s) {
progressBar.setVisibility(View.GONE);
// String pattern = "\\d{5,6}\\b|\\b[A-Z_]+\\b";//正則表達(dá)式過濾
String pattern = "\\d{5,6}\\b";//正則表達(dá)式過濾
Pattern p = Pattern.compile(pattern);
Matcher m = p.matcher(s);
StringBuilder formatStringBuilder = new StringBuilder();
while (m.find()) {
formatStringBuilder.append(m.group()).append("\n");
// Log.i(TAG,"formatStringBuilder---------"+formatStringBuilder.toString());
}
text.setText(formatStringBuilder);
}
}
//轉(zhuǎn)換成黑白照片,更利于識(shí)別圖片
public static Bitmap toHeibai(Bitmap mBitmap) {
int mBitmapWidth = 0;
int mBitmapHeight = 0;
//截取圖片寬度的3分之一
mBitmapWidth = mBitmap.getWidth() / 3;
mBitmapHeight = mBitmap.getHeight();
Bitmap bmpReturn = Bitmap.createBitmap(mBitmapWidth, mBitmapHeight,
Bitmap.Config.ARGB_8888);
Bitmap resizeBmp;
int iPixel = 0;
int wTime = 0;//用于判斷是白色背景的圖片
int bTime = 0;//用于判斷是黑色背景的圖片
for (int i = 0; i < mBitmapWidth; i++) {
for (int j = 0; j < mBitmapHeight; j++) {
int curr_color = mBitmap.getPixel(i, j);
int avg = (Color.red(curr_color) + Color.green(curr_color) + Color
.blue(curr_color)) / 3;
if (avg >= 190)//修改這個(gè)值會(huì)影響字體顏色的深淺闭翩,這個(gè)項(xiàng)目的截圖的股票代碼字體比較暗挣郭,設(shè)置成190有利于識(shí)別,
{
iPixel = 255;
wTime++;
} else if (avg < 190 && avg > 100) {
if (wTime > bTime) {//當(dāng)為白色的背景圖片時(shí)
iPixel = 0;
} else {
iPixel = 255;
}
} else {
iPixel = 0;
bTime++;
}
int modif_color = Color.argb(255, iPixel, iPixel, iPixel);
bmpReturn.setPixel(i, j, modif_color);
}
}
if (mBitmap != null) {
mBitmap.recycle();
mBitmap = null;
}
resizeBmp = ThumbnailUtils.extractThumbnail(bmpReturn, mBitmapWidth, mBitmapHeight);
return resizeBmp;
}
}
相信注釋已經(jīng)很明白疗韵。來看圖(不知道用什么工具制作效果圖兑障,有小伙伴知道告訴我一聲)
選擇圖片
識(shí)別結(jié)果和處理過后的圖片
會(huì)出現(xiàn)一些識(shí)別錯(cuò)誤的東西,但是沒有關(guān)系,可以完善正則去匹配流译,也可以完善功能讓用戶選擇需要的逞怨。