Commit 092c5d08 authored by Tom's avatar Tom
Browse files

changed default range for line cleanup

parent 6e787002
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -843,7 +843,7 @@ def latin_kernel(line,scale=1.0,r=1.2,debug=0):
mask[y0:bottoms[i],i] = 1
return mask
def latin_filter(line,scale=1.0,r=1.2,debug=0):
def latin_filter(line,scale=1.0,r=1.5,debug=0):
bin = (line>0.5*amax(line))
mask = latin_kernel(bin,scale=scale,r=r,debug=debug)
mask = psegutils.keep_marked(bin,mask)
......
......@@ -270,7 +270,10 @@ class CmodelLineRecognizer:
self.min_xheight = 10
self.max_xheight = 40
self.check_white_on_black = 1
self.noise_threshold = 8
# components smaller than this are removed
self.noise_threshold = 3
# components up to xheight*r are captured
self.latin_r = 1.5
#self.segmenter = ocrolseg.DpSegmenter()
#self.segmenter0 = ocrolseg.SegmentLineByGCCS()
self.segmenter = lineseg.DPSegmentLine()
......@@ -314,7 +317,7 @@ class CmodelLineRecognizer:
# clean up connected components around the edges
if self.latin_cleaner:
image = 1-image
image = common.latin_filter(image)
image = common.latin_filter(image,r=self.latin_r)
image = common.remove_noise(image,self.noise_threshold)
image = 1-image
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment