sed '/^$/d' and grep -Ev '^$' failed to remove blank lines
I have such a file with multiple blank lines in vscode
$ tail -n 20 draft3.py
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Tried multiple methods to remove the blank lines
`grep -v -e '^$' failed
$ tail -n 20 draft3.py | grep -v -e '^$'
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
`grep -Ev "^$" failed
$ tail -n 20 draft3.py | grep -Ev "^$"
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
`sed '/^$/d' failed
$ tail -n 20 draft3.py | sed '/^$/d'
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
What's the problem? How could remove the blank lines?
command-line text-processing grep sed
add a comment |
I have such a file with multiple blank lines in vscode
$ tail -n 20 draft3.py
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Tried multiple methods to remove the blank lines
`grep -v -e '^$' failed
$ tail -n 20 draft3.py | grep -v -e '^$'
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
`grep -Ev "^$" failed
$ tail -n 20 draft3.py | grep -Ev "^$"
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
`sed '/^$/d' failed
$ tail -n 20 draft3.py | sed '/^$/d'
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
What's the problem? How could remove the blank lines?
command-line text-processing grep sed
add a comment |
I have such a file with multiple blank lines in vscode
$ tail -n 20 draft3.py
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Tried multiple methods to remove the blank lines
`grep -v -e '^$' failed
$ tail -n 20 draft3.py | grep -v -e '^$'
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
`grep -Ev "^$" failed
$ tail -n 20 draft3.py | grep -Ev "^$"
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
`sed '/^$/d' failed
$ tail -n 20 draft3.py | sed '/^$/d'
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
What's the problem? How could remove the blank lines?
command-line text-processing grep sed
I have such a file with multiple blank lines in vscode
$ tail -n 20 draft3.py
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Tried multiple methods to remove the blank lines
`grep -v -e '^$' failed
$ tail -n 20 draft3.py | grep -v -e '^$'
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
`grep -Ev "^$" failed
$ tail -n 20 draft3.py | grep -Ev "^$"
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
`sed '/^$/d' failed
$ tail -n 20 draft3.py | sed '/^$/d'
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
What's the problem? How could remove the blank lines?
command-line text-processing grep sed
command-line text-processing grep sed
edited yesterday
Sergiy Kolodyazhnyy
74.5k9155325
74.5k9155325
asked yesterday
AliceAlice
567111
567111
add a comment |
add a comment |
2 Answers
2
active
oldest
votes
Presumably you want to remove not only empty lines, but also lines with only whitespace characters. For that, use:
sed '/^s*$/d' # or respectively
grep -v '^s*$'
This d
eletes every line with any number (*
) of whitespace characters (s
) in it. grep -v
outputs any line which does not match the expression.
Example usage
$ sed '/^s*$/d' <draft3.py
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
add a comment |
grep -v '^$'
will remove empty lines. But what if we have spaces or tabs in some lines ? For example I added 3 spaces to parts of your text, and if we do cat -A
we will see that it shows line terminator $
, but it will be offset.
$
mid = (lo + hi) // 2$
$
if x == a[mid]:$
return x$
if x > a[mid]:$
The second line there has 3 spaces, first one doesn't. So we also want to use [[:blank:]]
character class to account for those as well:
$ grep -v '^[[:blank:]]*$' text.txt
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Now you should see that the line with 3 added spaces is gone. The *
signifies zero or more repetitions of the characters, so the pattern ^[[:blank:]]*$
also implies ^$
when there are zero whitespace or tab characters on the line. So this pattern handles both truly empty and seemingly empty lines. It also applies exactly the same to grep
or sed
, because we're using basic regex expressions and [[:blank:]]
is one of the POSIX character classes, so it is portable.
We could also do something like this in python but without regex patterns:
$ python3 -c 'import sys; print("n".join([ l.rstrip() for l in sys.stdin if l.strip().split() ]))' < text.txt
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Why does this work ? Because .split()
on a string will split at whitespaces to extract non-whitespace tokens. If a line contains only spaces, the resulting list from .split()
will be empty.
As noted by ilkkachu in the comments, the issue can also occur if you use CRLF line endings ( used in DOS/Windows text files). It is easy to see if the file uses CRLF line endings via cat -A
, they will be marked as ^M
. For example,
$ printf 'hellonrnWorldn rntestnnnewtestn' | cat -A
hello$
^M$
World$
^M$
test$
$
newtest$
One thing that could be done to account for carriage return is this:
$ printf 'hellonrnWorldn ntestnnnewtestn' | sed '/^[[:blank:]]*r*$/d'
hello
World
test
newtest
It may be simpler to first use a dos2unix
utility designed specifically for converting DOS files to Unix files, and then use sed
and grep
. See ByteCommander's answer that shows example of how to do that.
1
"regular" whitespace like spaces could do that, or they might have CRLF line endings
– ilkkachu
yesterday
@ilkkachu Good point, thank you. I've edited to account for CRLF as well
– Sergiy Kolodyazhnyy
yesterday
1
oh, I just noticed you used[:blank:]
and not[:space:]
. The latter would match the CR too.
– ilkkachu
yesterday
add a comment |
Your Answer
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "89"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2faskubuntu.com%2fquestions%2f1128019%2fsed-d-and-grep-ev-failed-to-remove-blank-lines%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
2 Answers
2
active
oldest
votes
2 Answers
2
active
oldest
votes
active
oldest
votes
active
oldest
votes
Presumably you want to remove not only empty lines, but also lines with only whitespace characters. For that, use:
sed '/^s*$/d' # or respectively
grep -v '^s*$'
This d
eletes every line with any number (*
) of whitespace characters (s
) in it. grep -v
outputs any line which does not match the expression.
Example usage
$ sed '/^s*$/d' <draft3.py
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
add a comment |
Presumably you want to remove not only empty lines, but also lines with only whitespace characters. For that, use:
sed '/^s*$/d' # or respectively
grep -v '^s*$'
This d
eletes every line with any number (*
) of whitespace characters (s
) in it. grep -v
outputs any line which does not match the expression.
Example usage
$ sed '/^s*$/d' <draft3.py
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
add a comment |
Presumably you want to remove not only empty lines, but also lines with only whitespace characters. For that, use:
sed '/^s*$/d' # or respectively
grep -v '^s*$'
This d
eletes every line with any number (*
) of whitespace characters (s
) in it. grep -v
outputs any line which does not match the expression.
Example usage
$ sed '/^s*$/d' <draft3.py
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Presumably you want to remove not only empty lines, but also lines with only whitespace characters. For that, use:
sed '/^s*$/d' # or respectively
grep -v '^s*$'
This d
eletes every line with any number (*
) of whitespace characters (s
) in it. grep -v
outputs any line which does not match the expression.
Example usage
$ sed '/^s*$/d' <draft3.py
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
edited yesterday
answered yesterday
dessertdessert
24.9k672105
24.9k672105
add a comment |
add a comment |
grep -v '^$'
will remove empty lines. But what if we have spaces or tabs in some lines ? For example I added 3 spaces to parts of your text, and if we do cat -A
we will see that it shows line terminator $
, but it will be offset.
$
mid = (lo + hi) // 2$
$
if x == a[mid]:$
return x$
if x > a[mid]:$
The second line there has 3 spaces, first one doesn't. So we also want to use [[:blank:]]
character class to account for those as well:
$ grep -v '^[[:blank:]]*$' text.txt
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Now you should see that the line with 3 added spaces is gone. The *
signifies zero or more repetitions of the characters, so the pattern ^[[:blank:]]*$
also implies ^$
when there are zero whitespace or tab characters on the line. So this pattern handles both truly empty and seemingly empty lines. It also applies exactly the same to grep
or sed
, because we're using basic regex expressions and [[:blank:]]
is one of the POSIX character classes, so it is portable.
We could also do something like this in python but without regex patterns:
$ python3 -c 'import sys; print("n".join([ l.rstrip() for l in sys.stdin if l.strip().split() ]))' < text.txt
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Why does this work ? Because .split()
on a string will split at whitespaces to extract non-whitespace tokens. If a line contains only spaces, the resulting list from .split()
will be empty.
As noted by ilkkachu in the comments, the issue can also occur if you use CRLF line endings ( used in DOS/Windows text files). It is easy to see if the file uses CRLF line endings via cat -A
, they will be marked as ^M
. For example,
$ printf 'hellonrnWorldn rntestnnnewtestn' | cat -A
hello$
^M$
World$
^M$
test$
$
newtest$
One thing that could be done to account for carriage return is this:
$ printf 'hellonrnWorldn ntestnnnewtestn' | sed '/^[[:blank:]]*r*$/d'
hello
World
test
newtest
It may be simpler to first use a dos2unix
utility designed specifically for converting DOS files to Unix files, and then use sed
and grep
. See ByteCommander's answer that shows example of how to do that.
1
"regular" whitespace like spaces could do that, or they might have CRLF line endings
– ilkkachu
yesterday
@ilkkachu Good point, thank you. I've edited to account for CRLF as well
– Sergiy Kolodyazhnyy
yesterday
1
oh, I just noticed you used[:blank:]
and not[:space:]
. The latter would match the CR too.
– ilkkachu
yesterday
add a comment |
grep -v '^$'
will remove empty lines. But what if we have spaces or tabs in some lines ? For example I added 3 spaces to parts of your text, and if we do cat -A
we will see that it shows line terminator $
, but it will be offset.
$
mid = (lo + hi) // 2$
$
if x == a[mid]:$
return x$
if x > a[mid]:$
The second line there has 3 spaces, first one doesn't. So we also want to use [[:blank:]]
character class to account for those as well:
$ grep -v '^[[:blank:]]*$' text.txt
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Now you should see that the line with 3 added spaces is gone. The *
signifies zero or more repetitions of the characters, so the pattern ^[[:blank:]]*$
also implies ^$
when there are zero whitespace or tab characters on the line. So this pattern handles both truly empty and seemingly empty lines. It also applies exactly the same to grep
or sed
, because we're using basic regex expressions and [[:blank:]]
is one of the POSIX character classes, so it is portable.
We could also do something like this in python but without regex patterns:
$ python3 -c 'import sys; print("n".join([ l.rstrip() for l in sys.stdin if l.strip().split() ]))' < text.txt
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Why does this work ? Because .split()
on a string will split at whitespaces to extract non-whitespace tokens. If a line contains only spaces, the resulting list from .split()
will be empty.
As noted by ilkkachu in the comments, the issue can also occur if you use CRLF line endings ( used in DOS/Windows text files). It is easy to see if the file uses CRLF line endings via cat -A
, they will be marked as ^M
. For example,
$ printf 'hellonrnWorldn rntestnnnewtestn' | cat -A
hello$
^M$
World$
^M$
test$
$
newtest$
One thing that could be done to account for carriage return is this:
$ printf 'hellonrnWorldn ntestnnnewtestn' | sed '/^[[:blank:]]*r*$/d'
hello
World
test
newtest
It may be simpler to first use a dos2unix
utility designed specifically for converting DOS files to Unix files, and then use sed
and grep
. See ByteCommander's answer that shows example of how to do that.
1
"regular" whitespace like spaces could do that, or they might have CRLF line endings
– ilkkachu
yesterday
@ilkkachu Good point, thank you. I've edited to account for CRLF as well
– Sergiy Kolodyazhnyy
yesterday
1
oh, I just noticed you used[:blank:]
and not[:space:]
. The latter would match the CR too.
– ilkkachu
yesterday
add a comment |
grep -v '^$'
will remove empty lines. But what if we have spaces or tabs in some lines ? For example I added 3 spaces to parts of your text, and if we do cat -A
we will see that it shows line terminator $
, but it will be offset.
$
mid = (lo + hi) // 2$
$
if x == a[mid]:$
return x$
if x > a[mid]:$
The second line there has 3 spaces, first one doesn't. So we also want to use [[:blank:]]
character class to account for those as well:
$ grep -v '^[[:blank:]]*$' text.txt
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Now you should see that the line with 3 added spaces is gone. The *
signifies zero or more repetitions of the characters, so the pattern ^[[:blank:]]*$
also implies ^$
when there are zero whitespace or tab characters on the line. So this pattern handles both truly empty and seemingly empty lines. It also applies exactly the same to grep
or sed
, because we're using basic regex expressions and [[:blank:]]
is one of the POSIX character classes, so it is portable.
We could also do something like this in python but without regex patterns:
$ python3 -c 'import sys; print("n".join([ l.rstrip() for l in sys.stdin if l.strip().split() ]))' < text.txt
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Why does this work ? Because .split()
on a string will split at whitespaces to extract non-whitespace tokens. If a line contains only spaces, the resulting list from .split()
will be empty.
As noted by ilkkachu in the comments, the issue can also occur if you use CRLF line endings ( used in DOS/Windows text files). It is easy to see if the file uses CRLF line endings via cat -A
, they will be marked as ^M
. For example,
$ printf 'hellonrnWorldn rntestnnnewtestn' | cat -A
hello$
^M$
World$
^M$
test$
$
newtest$
One thing that could be done to account for carriage return is this:
$ printf 'hellonrnWorldn ntestnnnewtestn' | sed '/^[[:blank:]]*r*$/d'
hello
World
test
newtest
It may be simpler to first use a dos2unix
utility designed specifically for converting DOS files to Unix files, and then use sed
and grep
. See ByteCommander's answer that shows example of how to do that.
grep -v '^$'
will remove empty lines. But what if we have spaces or tabs in some lines ? For example I added 3 spaces to parts of your text, and if we do cat -A
we will see that it shows line terminator $
, but it will be offset.
$
mid = (lo + hi) // 2$
$
if x == a[mid]:$
return x$
if x > a[mid]:$
The second line there has 3 spaces, first one doesn't. So we also want to use [[:blank:]]
character class to account for those as well:
$ grep -v '^[[:blank:]]*$' text.txt
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Now you should see that the line with 3 added spaces is gone. The *
signifies zero or more repetitions of the characters, so the pattern ^[[:blank:]]*$
also implies ^$
when there are zero whitespace or tab characters on the line. So this pattern handles both truly empty and seemingly empty lines. It also applies exactly the same to grep
or sed
, because we're using basic regex expressions and [[:blank:]]
is one of the POSIX character classes, so it is portable.
We could also do something like this in python but without regex patterns:
$ python3 -c 'import sys; print("n".join([ l.rstrip() for l in sys.stdin if l.strip().split() ]))' < text.txt
hi = len(a)
if lo < 0:
raise ValueError('low must be non-negative')
if lo == hi:
return None
mid = (lo + hi) // 2
if x == a[mid]:
return x
if x > a[mid]:
lo = mid + 1
return self.bi_search(a, x, lo, hi)
if x < a[mid]:
hi = mid
return self.bi_search(a, x, lo, hi)
Why does this work ? Because .split()
on a string will split at whitespaces to extract non-whitespace tokens. If a line contains only spaces, the resulting list from .split()
will be empty.
As noted by ilkkachu in the comments, the issue can also occur if you use CRLF line endings ( used in DOS/Windows text files). It is easy to see if the file uses CRLF line endings via cat -A
, they will be marked as ^M
. For example,
$ printf 'hellonrnWorldn rntestnnnewtestn' | cat -A
hello$
^M$
World$
^M$
test$
$
newtest$
One thing that could be done to account for carriage return is this:
$ printf 'hellonrnWorldn ntestnnnewtestn' | sed '/^[[:blank:]]*r*$/d'
hello
World
test
newtest
It may be simpler to first use a dos2unix
utility designed specifically for converting DOS files to Unix files, and then use sed
and grep
. See ByteCommander's answer that shows example of how to do that.
edited yesterday
answered yesterday
Sergiy KolodyazhnyySergiy Kolodyazhnyy
74.5k9155325
74.5k9155325
1
"regular" whitespace like spaces could do that, or they might have CRLF line endings
– ilkkachu
yesterday
@ilkkachu Good point, thank you. I've edited to account for CRLF as well
– Sergiy Kolodyazhnyy
yesterday
1
oh, I just noticed you used[:blank:]
and not[:space:]
. The latter would match the CR too.
– ilkkachu
yesterday
add a comment |
1
"regular" whitespace like spaces could do that, or they might have CRLF line endings
– ilkkachu
yesterday
@ilkkachu Good point, thank you. I've edited to account for CRLF as well
– Sergiy Kolodyazhnyy
yesterday
1
oh, I just noticed you used[:blank:]
and not[:space:]
. The latter would match the CR too.
– ilkkachu
yesterday
1
1
"regular" whitespace like spaces could do that, or they might have CRLF line endings
– ilkkachu
yesterday
"regular" whitespace like spaces could do that, or they might have CRLF line endings
– ilkkachu
yesterday
@ilkkachu Good point, thank you. I've edited to account for CRLF as well
– Sergiy Kolodyazhnyy
yesterday
@ilkkachu Good point, thank you. I've edited to account for CRLF as well
– Sergiy Kolodyazhnyy
yesterday
1
1
oh, I just noticed you used
[:blank:]
and not [:space:]
. The latter would match the CR too.– ilkkachu
yesterday
oh, I just noticed you used
[:blank:]
and not [:space:]
. The latter would match the CR too.– ilkkachu
yesterday
add a comment |
Thanks for contributing an answer to Ask Ubuntu!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2faskubuntu.com%2fquestions%2f1128019%2fsed-d-and-grep-ev-failed-to-remove-blank-lines%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown